diff --git a/vmstools/DESCRIPTION b/vmstools/DESCRIPTION
index 5e2c8ec..d03898b 100644
--- a/vmstools/DESCRIPTION
+++ b/vmstools/DESCRIPTION
@@ -11,3 +11,4 @@ Imports: cluster, data.table, doBy, maps, mapdata, maptools, PBSmapping, sp, seg
 Suggests: lattice, raster, shapefiles, tcltk
 Repository: CRAN
 LazyLoad: Yes
+Roxygen: list(markdown = TRUE)
diff --git a/vmstools/R/CSquare.r b/vmstools/R/CSquare.r
index 6c77455..a6463d8 100644
--- a/vmstools/R/CSquare.r
+++ b/vmstools/R/CSquare.r
@@ -1,46 +1,70 @@
-CSquare <- function(lon,lat,degrees){
-
-              if(length(lon) != length(lat)) stop("length of longitude not equal to length of latitude")
-              if(!degrees %in% c(10,5,1,0.5,0.1,0.05,0.01)) stop("degrees specified not in range: c(10,5,1,0.5,0.1,0.05,0.01)")
-
-              dims <- length(lon)
-
-              quadrants <- array(NA,dim=c(4,6,dims),dimnames=list(c("globalQuadrant","intmQuadrant1","intmQuadrant2","intmQuadrant3"),c("quadrantDigit","latDigit","lonDigit","latRemain","lonRemain","code"),seq(1,dims,1)))
-
-              quadrants["globalQuadrant","quadrantDigit",] <- 4-(((2*floor(1+(lon/200)))-1)*((2*floor(1+(lat/200)))+1))
-              quadrants["globalQuadrant","latDigit",]      <- floor(abs(lat)/10)
-              quadrants["globalQuadrant","lonDigit",]      <- floor(abs(lon)/10)
-              quadrants["globalQuadrant","latRemain",]     <- round(abs(lat)-(quadrants["globalQuadrant","latDigit",]*10),7)
-              quadrants["globalQuadrant","lonRemain",]     <- round(abs(lon)-(quadrants["globalQuadrant","lonDigit",]*10),7)
-              quadrants["globalQuadrant","code",]          <- quadrants["globalQuadrant","quadrantDigit",]*1000+quadrants["globalQuadrant","latDigit",]*100+quadrants["globalQuadrant","lonDigit",]
-
-              quadrants["intmQuadrant1","quadrantDigit",]  <- (2*floor(quadrants["globalQuadrant","latRemain",]*0.2))+floor(quadrants["globalQuadrant","lonRemain",]*0.2)+1
-              quadrants["intmQuadrant1","latDigit",]       <- floor(quadrants["globalQuadrant","latRemain",])
-              quadrants["intmQuadrant1","lonDigit",]       <- floor(quadrants["globalQuadrant","lonRemain",])
-              quadrants["intmQuadrant1","latRemain",]      <- round((quadrants["globalQuadrant","latRemain",]-quadrants["intmQuadrant1","latDigit",])*10,7)
-              quadrants["intmQuadrant1","lonRemain",]      <- round((quadrants["globalQuadrant","lonRemain",]-quadrants["intmQuadrant1","lonDigit",])*10,7)
-              quadrants["intmQuadrant1","code",]           <- quadrants["intmQuadrant1","quadrantDigit",]*100+quadrants["intmQuadrant1","latDigit",]*10+quadrants["intmQuadrant1","lonDigit",]
-
-              quadrants["intmQuadrant2","quadrantDigit",]  <- (2*floor(quadrants["intmQuadrant1","latRemain",]*0.2))+floor(quadrants["intmQuadrant1","lonRemain",]*0.2)+1
-              quadrants["intmQuadrant2","latDigit",]       <- floor(quadrants["intmQuadrant1","latRemain",])
-              quadrants["intmQuadrant2","lonDigit",]       <- floor(quadrants["intmQuadrant1","lonRemain",])
-              quadrants["intmQuadrant2","latRemain",]      <- round((quadrants["intmQuadrant1","latRemain",]-quadrants["intmQuadrant2","latDigit",])*10,7)
-              quadrants["intmQuadrant2","lonRemain",]      <- round((quadrants["intmQuadrant1","lonRemain",]-quadrants["intmQuadrant2","lonDigit",])*10,7)
-              quadrants["intmQuadrant2","code",]           <- quadrants["intmQuadrant2","quadrantDigit",]*100+quadrants["intmQuadrant2","latDigit",]*10+quadrants["intmQuadrant2","lonDigit",]
-
-              quadrants["intmQuadrant3","quadrantDigit",]  <- (2*floor(quadrants["intmQuadrant2","latRemain",]*0.2))+floor(quadrants["intmQuadrant2","lonRemain",]*0.2)+1
-              quadrants["intmQuadrant3","latDigit",]       <- floor(quadrants["intmQuadrant2","latRemain",])
-              quadrants["intmQuadrant3","lonDigit",]       <- floor(quadrants["intmQuadrant2","lonRemain",])
-              quadrants["intmQuadrant3","latRemain",]      <- round((quadrants["intmQuadrant2","latRemain",]-quadrants["intmQuadrant3","latDigit",])*10,7)
-              quadrants["intmQuadrant3","lonRemain",]      <- round((quadrants["intmQuadrant2","lonRemain",]-quadrants["intmQuadrant3","lonDigit",])*10,7)
-              quadrants["intmQuadrant3","code",]           <- quadrants["intmQuadrant3","quadrantDigit",]*100+quadrants["intmQuadrant3","latDigit",]*10+quadrants["intmQuadrant3","lonDigit",]
-
-              if(degrees == 10)   CSquareCodes  <- quadrants["globalQuadrant","code",]
-              if(degrees == 5)    CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","quadrantDigit",],sep="")
-              if(degrees == 1)    CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],sep="")
-              if(degrees == 0.5)  CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","quadrantDigit",],sep="")
-              if(degrees == 0.1)  CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],sep="")
-              if(degrees == 0.05) CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","quadrantDigit",],sep="")
-              if(degrees == 0.01) CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","code",],sep="")
-
-return(CSquareCodes)}
\ No newline at end of file
+#' Calculate the CSquare notation from the GPS locations
+#' 
+#' Compute the CSquare notation from the GPS location where you can define what
+#' resolution you want the CSquare notation to be.
+#' 
+#' 
+#' @param lon Longitudes of points
+#' @param lat Latitudes of points
+#' @param degrees Resolution of CSquare notation: 10, 5, 1, 0.5, 0.1, 0.05,
+#' 0.01
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}},
+#' \code{\link{lonLatRatio}}, \code{\link{distance}}
+#' @references EU Lot 2 project, based on CSquare:
+#' http://www.marine.csiro.au/csquares/
+#' @examples
+#' 
+#' lon <- -4
+#' lat <- 50
+#' degrees <- 0.01
+#' 
+#' CSquare(lon,lat,degrees) # "7500:104:100:100"
+#' 
+#' @export CSquare
+CSquare <- function(lon,lat,degrees){
+
+              if(length(lon) != length(lat)) stop("length of longitude not equal to length of latitude")
+              if(!degrees %in% c(10,5,1,0.5,0.1,0.05,0.01)) stop("degrees specified not in range: c(10,5,1,0.5,0.1,0.05,0.01)")
+
+              dims <- length(lon)
+
+              quadrants <- array(NA,dim=c(4,6,dims),dimnames=list(c("globalQuadrant","intmQuadrant1","intmQuadrant2","intmQuadrant3"),c("quadrantDigit","latDigit","lonDigit","latRemain","lonRemain","code"),seq(1,dims,1)))
+
+              quadrants["globalQuadrant","quadrantDigit",] <- 4-(((2*floor(1+(lon/200)))-1)*((2*floor(1+(lat/200)))+1))
+              quadrants["globalQuadrant","latDigit",]      <- floor(abs(lat)/10)
+              quadrants["globalQuadrant","lonDigit",]      <- floor(abs(lon)/10)
+              quadrants["globalQuadrant","latRemain",]     <- round(abs(lat)-(quadrants["globalQuadrant","latDigit",]*10),7)
+              quadrants["globalQuadrant","lonRemain",]     <- round(abs(lon)-(quadrants["globalQuadrant","lonDigit",]*10),7)
+              quadrants["globalQuadrant","code",]          <- quadrants["globalQuadrant","quadrantDigit",]*1000+quadrants["globalQuadrant","latDigit",]*100+quadrants["globalQuadrant","lonDigit",]
+
+              quadrants["intmQuadrant1","quadrantDigit",]  <- (2*floor(quadrants["globalQuadrant","latRemain",]*0.2))+floor(quadrants["globalQuadrant","lonRemain",]*0.2)+1
+              quadrants["intmQuadrant1","latDigit",]       <- floor(quadrants["globalQuadrant","latRemain",])
+              quadrants["intmQuadrant1","lonDigit",]       <- floor(quadrants["globalQuadrant","lonRemain",])
+              quadrants["intmQuadrant1","latRemain",]      <- round((quadrants["globalQuadrant","latRemain",]-quadrants["intmQuadrant1","latDigit",])*10,7)
+              quadrants["intmQuadrant1","lonRemain",]      <- round((quadrants["globalQuadrant","lonRemain",]-quadrants["intmQuadrant1","lonDigit",])*10,7)
+              quadrants["intmQuadrant1","code",]           <- quadrants["intmQuadrant1","quadrantDigit",]*100+quadrants["intmQuadrant1","latDigit",]*10+quadrants["intmQuadrant1","lonDigit",]
+
+              quadrants["intmQuadrant2","quadrantDigit",]  <- (2*floor(quadrants["intmQuadrant1","latRemain",]*0.2))+floor(quadrants["intmQuadrant1","lonRemain",]*0.2)+1
+              quadrants["intmQuadrant2","latDigit",]       <- floor(quadrants["intmQuadrant1","latRemain",])
+              quadrants["intmQuadrant2","lonDigit",]       <- floor(quadrants["intmQuadrant1","lonRemain",])
+              quadrants["intmQuadrant2","latRemain",]      <- round((quadrants["intmQuadrant1","latRemain",]-quadrants["intmQuadrant2","latDigit",])*10,7)
+              quadrants["intmQuadrant2","lonRemain",]      <- round((quadrants["intmQuadrant1","lonRemain",]-quadrants["intmQuadrant2","lonDigit",])*10,7)
+              quadrants["intmQuadrant2","code",]           <- quadrants["intmQuadrant2","quadrantDigit",]*100+quadrants["intmQuadrant2","latDigit",]*10+quadrants["intmQuadrant2","lonDigit",]
+
+              quadrants["intmQuadrant3","quadrantDigit",]  <- (2*floor(quadrants["intmQuadrant2","latRemain",]*0.2))+floor(quadrants["intmQuadrant2","lonRemain",]*0.2)+1
+              quadrants["intmQuadrant3","latDigit",]       <- floor(quadrants["intmQuadrant2","latRemain",])
+              quadrants["intmQuadrant3","lonDigit",]       <- floor(quadrants["intmQuadrant2","lonRemain",])
+              quadrants["intmQuadrant3","latRemain",]      <- round((quadrants["intmQuadrant2","latRemain",]-quadrants["intmQuadrant3","latDigit",])*10,7)
+              quadrants["intmQuadrant3","lonRemain",]      <- round((quadrants["intmQuadrant2","lonRemain",]-quadrants["intmQuadrant3","lonDigit",])*10,7)
+              quadrants["intmQuadrant3","code",]           <- quadrants["intmQuadrant3","quadrantDigit",]*100+quadrants["intmQuadrant3","latDigit",]*10+quadrants["intmQuadrant3","lonDigit",]
+
+              if(degrees == 10)   CSquareCodes  <- quadrants["globalQuadrant","code",]
+              if(degrees == 5)    CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","quadrantDigit",],sep="")
+              if(degrees == 1)    CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],sep="")
+              if(degrees == 0.5)  CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","quadrantDigit",],sep="")
+              if(degrees == 0.1)  CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],sep="")
+              if(degrees == 0.05) CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","quadrantDigit",],sep="")
+              if(degrees == 0.01) CSquareCodes  <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","code",],sep="")
+
+return(CSquareCodes)}
diff --git a/vmstools/R/CSquare2LonLat.r b/vmstools/R/CSquare2LonLat.r
index 1e5ef48..a2733a3 100644
--- a/vmstools/R/CSquare2LonLat.r
+++ b/vmstools/R/CSquare2LonLat.r
@@ -1,48 +1,72 @@
-CSquare2LonLat <- function(csqr,degrees){
-
-ra          <- 1e-6 #Artificial number to add for rounding of 5'ves (round(0.5) = 0, but in Excel (where conversion comes from, it is 1)
-chars       <- an(nchar(csqr))
-tensqd      <- an(substr(csqr,1,1))+ra;      gqlat     <- (round(abs(tensqd-4)*2/10)*10/5)-1
-tenslatd    <- an(substr(csqr,2,2))+ra;      gqlon     <- (2*round(tensqd/10)-1)*-1
-tenslond    <- an(substr(csqr,3,4))+ra
-unitsqd     <- an(substr(csqr,6,6))+ra;      iqulat    <- round(unitsqd*2/10)
-unitslatd   <- an(substr(csqr,7,7))+ra;      iqulon    <- (round((unitsqd-1)/2,1) - floor((unitsqd-1)/2))*2
-unitslond   <- an(substr(csqr,8,8))+ra
-tenthsqd    <- an(substr(csqr,10,10))+ra;    iqtlat    <- round(tenthsqd*2/10)
-tenthslatd  <- an(substr(csqr,11,11))+ra;    iqtlon    <- (round((tenthsqd-1)/2,1) - floor((tenthsqd-1)/2))*2
-tenthslond  <- an(substr(csqr,12,12))+ra
-hundqd      <- an(substr(csqr,14,14))+ra;    iqhlat    <- round(hundqd*2/10)
-hundlatd    <- an(substr(csqr,15,15))+ra;    iqhlon    <- (round((hundqd-1)/2,1) - floor((hundqd-1)/2))*2
-hundlond    <- an(substr(csqr,16,16))+ra
-reso        <- 10^(1-floor((chars-4)/4))-((round((chars-4)/4,1)-floor((chars-4)/4))*10^(1-floor((chars-4)/4)))
-
-if(degrees < reso[1]) stop("Returning degrees is smaller than format of C-square")
-if(degrees == 10){   
-  lat <- ((tenslatd*10)+5)*gqlat-ra                              
-  lon <- ((tenslond*10)+5)*gqlon-ra              
-}
-if(degrees == 5){    
-  lat <- ((tenslatd*10)+(iqulat*5)+2.5)*gqlat-ra
-  lon <- ((tenslond*10)+(iqulon*5)+2.5)*gqlon-ra 
-}
-if(degrees == 1){    
-  lat <- ((tenslatd*10)+ unitslatd+0.5)*gqlat-ra
-  lon <- ((tenslond*10)+ unitslond+0.5)*gqlon-ra 
-}
-if(degrees == 0.5){  
-  lat <- ((tenslatd*10)+ unitslatd + (iqtlat*0.5)+0.25)*gqlat-ra
-  lon <- ((tenslond*10)+ unitslond + (iqtlon*0.5)+0.25)*gqlon-ra
-}         
-if(degrees == 0.1){  
-  lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+0.05)*gqlat-ra
-  lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+0.05)*gqlon-ra
-}
-if(degrees == 0.05){ 
-  lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(iqhlat*0.05)+0.025)*gqlat-ra
-  lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(iqhlon*0.05)+0.025)*gqlon-ra
-}
-if(degrees == 0.01){ 
-  lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(hundlatd*0.01)+0.005)*gqlat-ra
-  lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(hundlond*0.01)+0.005)*gqlon-ra
-}
-return(data.frame(SI_LATI=lat,SI_LONG=lon))}
+#' Calculate the GPS notation from the CSquare notation
+#' 
+#' Convert the CSquare notation to GPS location where you can define what
+#' resolution you want the GPS notation to be.
+#' 
+#' 
+#' @param csqr CSquare notation like: 1400:134:120:356
+#' @param degrees Degrees resolution of returned GPS notation : 10, 5, 1, 0.5,
+#' 0.1, 0.05, 0.01
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}},
+#' \code{\link{lonLatRatio}}, \code{\link{distance}}
+#' @references EU Lot 2 project, based on CSquare:
+#' http://www.marine.csiro.au/csquares/
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat$CSquare  <- CSquare(tacsat$SI_LONG,tacsat$SI_LATI,0.05)
+#' tacsat$SI_LONG2 <- CSquare2LonLat(tacsat$CSquare,0.5)$SI_LONG
+#' tacsat$SI_LATI2 <- CSquare2LonLat(tacsat$CSquare,0.5)$SI_LATI
+#' tacsat$CSquare2 <- CSquare(tacsat$SI_LONG2,tacsat$SI_LATI2,1)
+#' head(tacsat[which(tacsat$CSquare != tacsat$CSquare2),])
+#' 
+#' @export CSquare2LonLat
+CSquare2LonLat <- function(csqr,degrees){
+
+ra          <- 1e-6 #Artificial number to add for rounding of 5'ves (round(0.5) = 0, but in Excel (where conversion comes from, it is 1)
+chars       <- an(nchar(csqr))
+tensqd      <- an(substr(csqr,1,1))+ra;      gqlat     <- (round(abs(tensqd-4)*2/10)*10/5)-1
+tenslatd    <- an(substr(csqr,2,2))+ra;      gqlon     <- (2*round(tensqd/10)-1)*-1
+tenslond    <- an(substr(csqr,3,4))+ra
+unitsqd     <- an(substr(csqr,6,6))+ra;      iqulat    <- round(unitsqd*2/10)
+unitslatd   <- an(substr(csqr,7,7))+ra;      iqulon    <- (round((unitsqd-1)/2,1) - floor((unitsqd-1)/2))*2
+unitslond   <- an(substr(csqr,8,8))+ra
+tenthsqd    <- an(substr(csqr,10,10))+ra;    iqtlat    <- round(tenthsqd*2/10)
+tenthslatd  <- an(substr(csqr,11,11))+ra;    iqtlon    <- (round((tenthsqd-1)/2,1) - floor((tenthsqd-1)/2))*2
+tenthslond  <- an(substr(csqr,12,12))+ra
+hundqd      <- an(substr(csqr,14,14))+ra;    iqhlat    <- round(hundqd*2/10)
+hundlatd    <- an(substr(csqr,15,15))+ra;    iqhlon    <- (round((hundqd-1)/2,1) - floor((hundqd-1)/2))*2
+hundlond    <- an(substr(csqr,16,16))+ra
+reso        <- 10^(1-floor((chars-4)/4))-((round((chars-4)/4,1)-floor((chars-4)/4))*10^(1-floor((chars-4)/4)))
+
+if(degrees < reso[1]) stop("Returning degrees is smaller than format of C-square")
+if(degrees == 10){   
+  lat <- ((tenslatd*10)+5)*gqlat-ra                              
+  lon <- ((tenslond*10)+5)*gqlon-ra              
+}
+if(degrees == 5){    
+  lat <- ((tenslatd*10)+(iqulat*5)+2.5)*gqlat-ra
+  lon <- ((tenslond*10)+(iqulon*5)+2.5)*gqlon-ra 
+}
+if(degrees == 1){    
+  lat <- ((tenslatd*10)+ unitslatd+0.5)*gqlat-ra
+  lon <- ((tenslond*10)+ unitslond+0.5)*gqlon-ra 
+}
+if(degrees == 0.5){  
+  lat <- ((tenslatd*10)+ unitslatd + (iqtlat*0.5)+0.25)*gqlat-ra
+  lon <- ((tenslond*10)+ unitslond + (iqtlon*0.5)+0.25)*gqlon-ra
+}         
+if(degrees == 0.1){  
+  lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+0.05)*gqlat-ra
+  lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+0.05)*gqlon-ra
+}
+if(degrees == 0.05){ 
+  lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(iqhlat*0.05)+0.025)*gqlat-ra
+  lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(iqhlon*0.05)+0.025)*gqlon-ra
+}
+if(degrees == 0.01){ 
+  lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(hundlatd*0.01)+0.005)*gqlat-ra
+  lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(hundlond*0.01)+0.005)*gqlon-ra
+}
+return(data.frame(SI_LATI=lat,SI_LONG=lon))}
diff --git a/vmstools/R/ICESarea.r b/vmstools/R/ICESarea.r
index 72a153d..44f8813 100644
--- a/vmstools/R/ICESarea.r
+++ b/vmstools/R/ICESarea.r
@@ -1,3 +1,27 @@
+#' Get ICES area from coordinates
+#' 
+#' Get the ICES area from any lon,lat position, given that this position is
+#' within the ICES region.
+#' 
+#' 
+#' @param tacsat dataframe given that they have 'SI_LONG' and 'SI_LATI' columns
+#' (either tacsat format or other dataset with SI_LONG and SI_LATI columns)
+#' @param areas ICES areas as SpatialPolygons
+#' @param proj4string Projection string, default to NULL.
+#' @param fast If memory allocation is not a problem, a faster version can be
+#' switched on
+#' @return Returns the areas as a vector
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{ICESrectangle}}, \code{\link{ICESrectangle2LonLat}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' data(ICESareas)
+#' res   <- data.frame(SI_LONG = c(1,2,2,4,2),
+#'                     SI_LATI = c(53,53.2,54,56.7,55.2))
+#' areas <- ICESarea(res,ICESareas)
+#' 
+#' @export ICESarea
 ICESarea <- function(tacsat,areas,proj4string=NULL,fast=FALSE){
   require(sp)
   if(!class(areas) %in% c("SpatialPolygons","SpatialPolygonsDataFrame")) stop("'areas' must be specified as class 'SpatialPolygons' or 'SpatialPolygonsDataFrame'")
diff --git a/vmstools/R/ICESrectangle.r b/vmstools/R/ICESrectangle.r
index 82e1b02..89dd2f7 100644
--- a/vmstools/R/ICESrectangle.r
+++ b/vmstools/R/ICESrectangle.r
@@ -1,23 +1,42 @@
-ICESrectangle <- function(dF){
-                    rectChar1n2 <- sprintf("%02i",as.integer(2 * (dF[, "SI_LATI"] - 35.5)))
-                    rectChar3 <- ifelse(dF[, "SI_LONG"] > -50 & dF[, "SI_LONG"]<= -40, "A",
-                                 ifelse(dF[, "SI_LONG"] > -40 & dF[, "SI_LONG"]<= -30, "B",
-                                 ifelse(dF[, "SI_LONG"] > -30 & dF[, "SI_LONG"]<= -20, "C",
-                                 ifelse(dF[, "SI_LONG"] > -20 & dF[, "SI_LONG"]<= -10, "D",
-                                 ifelse(dF[, "SI_LONG"] > -10 & dF[, "SI_LONG"]<    0, "E",
-                                 #-Note that at 0 meridian the allocation of points at the meridian switch
-                                 ifelse(dF[, "SI_LONG"] >=  0 & dF[, "SI_LONG"]<  10, "F",
-                                 ifelse(dF[, "SI_LONG"] >= 10 & dF[, "SI_LONG"]<  20, "G",
-                                 ifelse(dF[, "SI_LONG"] >= 20 & dF[, "SI_LONG"]<  30, "H", "J"))))))))
-                    rectChar4  <- rep(NA,nrow(dF))
-                    idxlowzero <- which(dF[,"SI_LONG"] <  0)
-                    idxabozero <- which(dF[,"SI_LONG"] >= 0)
-                    if(length(idxlowzero)>0) rectChar4[idxlowzero] <- ceiling(dF[idxlowzero,"SI_LONG"] %% 10 -1 + 10)%%10
-                    if(length(idxabozero)>0) rectChar4[idxabozero] <- floor(dF[idxabozero,"SI_LONG"] %% 10)
-                    rectID <- paste(rectChar1n2, rectChar3, rectChar4, sep = "")
-                 return(rectID)}
-
-
-#dF <- as.data.frame(cbind(SI_LONG = seq(-20,-9,0.5),SI_LATI = rep(54,length(seq(-20,-9,0.5)))))
-#dF <- as.data.frame(cbind(SI_LONG = seq(0,12,0.5),SI_LATI = rep(54,length(seq(0,12,0.5)))))
-#dF$ICES <- ICESrectangle(dF)
\ No newline at end of file
+#' Get ICES rectangle from coordinates
+#' 
+#' Get the ICES rectangle from any lon,lat position, given that this position
+#' is within the ICES region.
+#' 
+#' 
+#' @param dF dataframe given that they have 'SI_LONG' and 'SI_LATI' columns
+#' @return Returns the rectangles as a vector
+#' @author Neil Campbell
+#' @seealso \code{\link{ICESarea}}, \code{\link{ICESrectangle2LonLat}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' res <- data.frame(SI_LONG = c(1,2,2,4,2),
+#'                   SI_LATI = c(53,53.2,54,56.7,55.2))
+#' ICESrectangle(res)
+#' 
+#' 
+#' @export ICESrectangle
+ICESrectangle <- function(dF){
+                    rectChar1n2 <- sprintf("%02i",as.integer(2 * (dF[, "SI_LATI"] - 35.5)))
+                    rectChar3 <- ifelse(dF[, "SI_LONG"] > -50 & dF[, "SI_LONG"]<= -40, "A",
+                                 ifelse(dF[, "SI_LONG"] > -40 & dF[, "SI_LONG"]<= -30, "B",
+                                 ifelse(dF[, "SI_LONG"] > -30 & dF[, "SI_LONG"]<= -20, "C",
+                                 ifelse(dF[, "SI_LONG"] > -20 & dF[, "SI_LONG"]<= -10, "D",
+                                 ifelse(dF[, "SI_LONG"] > -10 & dF[, "SI_LONG"]<    0, "E",
+                                 #-Note that at 0 meridian the allocation of points at the meridian switch
+                                 ifelse(dF[, "SI_LONG"] >=  0 & dF[, "SI_LONG"]<  10, "F",
+                                 ifelse(dF[, "SI_LONG"] >= 10 & dF[, "SI_LONG"]<  20, "G",
+                                 ifelse(dF[, "SI_LONG"] >= 20 & dF[, "SI_LONG"]<  30, "H", "J"))))))))
+                    rectChar4  <- rep(NA,nrow(dF))
+                    idxlowzero <- which(dF[,"SI_LONG"] <  0)
+                    idxabozero <- which(dF[,"SI_LONG"] >= 0)
+                    if(length(idxlowzero)>0) rectChar4[idxlowzero] <- ceiling(dF[idxlowzero,"SI_LONG"] %% 10 -1 + 10)%%10
+                    if(length(idxabozero)>0) rectChar4[idxabozero] <- floor(dF[idxabozero,"SI_LONG"] %% 10)
+                    rectID <- paste(rectChar1n2, rectChar3, rectChar4, sep = "")
+                 return(rectID)}
+
+
+#dF <- as.data.frame(cbind(SI_LONG = seq(-20,-9,0.5),SI_LATI = rep(54,length(seq(-20,-9,0.5)))))
+#dF <- as.data.frame(cbind(SI_LONG = seq(0,12,0.5),SI_LATI = rep(54,length(seq(0,12,0.5)))))
+#dF$ICES <- ICESrectangle(dF)
diff --git a/vmstools/R/ICESrectangle2LonLat.r b/vmstools/R/ICESrectangle2LonLat.r
index 3bc41ff..24a264f 100644
--- a/vmstools/R/ICESrectangle2LonLat.r
+++ b/vmstools/R/ICESrectangle2LonLat.r
@@ -1,47 +1,70 @@
-ICESrectangle2LonLat <-
-function (statsq,midpoint=FALSE)
-{
-    #Split the code into its respective parts
-    latpart <- substr(statsq, 1, 2)
-    lonpart <- substr(statsq, 3, 4)
-
-    #Handle the latitude first, as its easier
-    #The grid increments continuously from
-    #south to north in 0.5 degree intervals
-    latlabels <- sprintf("%02i",1:99)
-    lat.mids <- seq(36, 85, 0.5) + 0.25
-    lat.idx <- match(latpart, latlabels)
-    lat <- lat.mids[lat.idx]
-
-    #The longtitudinal structure is not so easy
-    #There are three main traps:
-    #  - A4-A9 do not exist
-    #  - There are no I squares
-    #  - Grid ends at M8
-    lonlabels <- paste(rep(LETTERS[c(2:8,10:13)], each=10), rep(0:9,
-        7), sep = "")
-    lonlabels <- c("A0","A1","A2","A3",lonlabels[-length(lonlabels)])
-    lon.mids <- -44:68 + 0.5
-    lon.idx <- match(lonpart, lonlabels)
-    lon <- lon.mids[lon.idx]
-
-    #Check whether it worked
-    #If any part of the code is not recognised, both
-    #lon and lat should be NA
-    failed.codes <- is.na(lat) | is.na(lon)
-    if (any(failed.codes)) {
-        warning("Some stat squares are not valid. Please check the help files for ICESrectangle2LonLat() for more information about the formal definition of valid ICES rectangles.")
-        lat[failed.codes] <- NA
-        lon[failed.codes] <- NA
-    }
-
-    #Correct for midpoints
-    if(midpoint == FALSE){
-
-                        lat <- lat - 0.25
-                        lon <- lon - 0.5
-
-                        }
-    #Done
-    return(data.frame(SI_LATI=lat,SI_LONG=lon))
-}
+#' Convert the ICES rectangle to coordinates
+#' 
+#' Convert the ICES rectangle to the longitude and latitude position of that
+#' specific rectangle, with either midpoint or lower-left corner.
+#' 
+#' 
+#' @param statsq character vector of ICES rectangles
+#' @param midpoint logical statement if returned values need to be midpoints of
+#' rectangle or not (default)
+#' @return Returns dataframe with longitude and latitude columns
+#' @author Neil Campbell, Mark Payne
+#' @seealso \code{\link{ICESarea}}, \code{\link{ICESrectangle}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' res       <- data.frame(SI_LONG = c(1,2,2,4,2),
+#'                         SI_LATI = c(53,53.2,54,56.7,55.2))
+#' rects     <- ICESrectangle(res)
+#' midpoints <- ICESrectangle2LonLat(rects,midpoint=TRUE)
+#' corners   <- ICESrectangle2LonLat(rects,midpoint=FALSE)
+#' 
+#' 
+#' @export ICESrectangle2LonLat
+ICESrectangle2LonLat <-
+function (statsq,midpoint=FALSE)
+{
+    #Split the code into its respective parts
+    latpart <- substr(statsq, 1, 2)
+    lonpart <- substr(statsq, 3, 4)
+
+    #Handle the latitude first, as its easier
+    #The grid increments continuously from
+    #south to north in 0.5 degree intervals
+    latlabels <- sprintf("%02i",1:99)
+    lat.mids <- seq(36, 85, 0.5) + 0.25
+    lat.idx <- match(latpart, latlabels)
+    lat <- lat.mids[lat.idx]
+
+    #The longtitudinal structure is not so easy
+    #There are three main traps:
+    #  - A4-A9 do not exist
+    #  - There are no I squares
+    #  - Grid ends at M8
+    lonlabels <- paste(rep(LETTERS[c(2:8,10:13)], each=10), rep(0:9,
+        7), sep = "")
+    lonlabels <- c("A0","A1","A2","A3",lonlabels[-length(lonlabels)])
+    lon.mids <- -44:68 + 0.5
+    lon.idx <- match(lonpart, lonlabels)
+    lon <- lon.mids[lon.idx]
+
+    #Check whether it worked
+    #If any part of the code is not recognised, both
+    #lon and lat should be NA
+    failed.codes <- is.na(lat) | is.na(lon)
+    if (any(failed.codes)) {
+        warning("Some stat squares are not valid. Please check the help files for ICESrectangle2LonLat() for more information about the formal definition of valid ICES rectangles.")
+        lat[failed.codes] <- NA
+        lon[failed.codes] <- NA
+    }
+
+    #Correct for midpoints
+    if(midpoint == FALSE){
+
+                        lat <- lat - 0.25
+                        lon <- lon - 0.5
+
+                        }
+    #Done
+    return(data.frame(SI_LATI=lat,SI_LONG=lon))
+}
diff --git a/vmstools/R/N1p0.R b/vmstools/R/N1p0.R
index 04650b2..9126155 100644
--- a/vmstools/R/N1p0.R
+++ b/vmstools/R/N1p0.R
@@ -1,6 +1,35 @@
-`N1p0` <-
-function(x,mu,sig,p){
-          aa<- c((1/(sig*sqrt(2*pi)))*exp(-((x-(mu+p))^2)/(2*sig^2)))
-          return(aa)
-      }
-
+#' 2-dimensional Gaussian distribution with two-peak possibility
+#' 
+#' Computes the 2-dimensional Gaussian distribution depending on the distance
+#' from the mean, and standard deviation with adjustable mean and shift from
+#' mean into two peaks instead of 1
+#' 
+#' 
+#' @param x Observed value, deviance from mean
+#' @param mu Mean value
+#' @param sig Standard deviation of value
+#' @param p Shift from mean into two peaks
+#' @note Function is called inside plotCIinterpolation()
+#' @author Niels T. Hintzen
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' 
+#' x   <- matrix(c(5,4,3,4,5,4,3,2,3,4,3,2,1,2,3,
+#'                 3,2,1,2,3,4,3,2,3,4,5,4,3,4,5),nrow=6,ncol=5,byrow=TRUE)
+#' mu  <- 0
+#' sig <- x^0.5
+#' p   <- 0
+#' 
+#' res <- matrix(N1p0(x,mu,sig,p),nrow=6,ncol=5)
+#' 
+#'   #Plot the results
+#' image(res)
+#' 
+#' @export N1p0
+`N1p0` <-
+function(x,mu,sig,p){
+          aa<- c((1/(sig*sqrt(2*pi)))*exp(-((x-(mu+p))^2)/(2*sig^2)))
+          return(aa)
+      }
+
diff --git a/vmstools/R/ac.R b/vmstools/R/ac.R
index 2d2063c..b5a59d5 100644
--- a/vmstools/R/ac.R
+++ b/vmstools/R/ac.R
@@ -1,4 +1,20 @@
-`ac` <-
-function(x){return(as.character(x))}
-
-               # Hello world !
\ No newline at end of file
+#' shortcut for as.character
+#' 
+#' Change the class of an object to character
+#' 
+#' 
+#' @param x object to turn into character
+#' @return as.character attempts to coerce its argument to character type
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{as.character}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' as.character(5) #returns the number 5 as class 'character'
+#' ac(5)           #returns the number 5 also as class 'character'
+#' 
+#' @export ac
+`ac` <-
+function(x){return(as.character(x))}
+
+               # Hello world !
diff --git a/vmstools/R/activityTacsat.r b/vmstools/R/activityTacsat.r
index 8d5a8a8..c413aa0 100644
--- a/vmstools/R/activityTacsat.r
+++ b/vmstools/R/activityTacsat.r
@@ -1,421 +1,488 @@
-activityTacsat <- function(tacsat,units="year",analyse.by="LE_GEAR",storeScheme=NULL,plot=FALSE,level="all"){
-
-  require("mixtools")
-  if (!"SI_DATIM" %in% colnames(tacsat))
-        tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,
-            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-  if(!analyse.by %in% c("LE_GEAR","VE_REF")) stop("Analysing only by gear or vessel")
-
-  #- Make subset for only those tacsat records that have speed
-  tacsat$ID   <- 1:nrow(tacsat)
-  tacsat$SI_STATE <- NA
-  tacsatOrig  <- tacsat
-  idx         <- which(is.na(tacsat$SI_SP)==FALSE)
-  tacsat      <- tacsat[idx,]
-  
-  #- If sigma is NULL it needs to be estimated and gets a variable name
-  storeScheme$sigma0[which(storeScheme$sigma0==0)] <- "d"
-
-  if(units == "all"){   yrs <- 0;                                   mths  <- 0;                                    wks  <- 0}
-  if(units == "year"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- 0;                                    wks  <- 0}
-  if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- an(sort(unique(format(tacsat$SI_DATIM,"%m")))); wks  <- 0}
-  if(units == "week"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks   <- an(sort(unique(format(tacsat$SI_DATIM,"%W"))))+1;  mths <- 0}
-  
-  runScheme                 <- expand.grid(years=yrs,months=mths,weeks=wks)
-  
-  #-----------------------------------------------------------------------------
-  # Start run for all combinations of gear / vessel and units
-  #-----------------------------------------------------------------------------
-  
-  for(iRun in 1:nrow(runScheme)){
-    yr  <- runScheme[iRun,"years"]
-    mth <- runScheme[iRun,"months"]
-    wk  <- runScheme[iRun,"weeks"]
-    if(nrow(runScheme)==1){ sTacsat <- tacsat
-    } else {
-        if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr)
-        if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & (an(format( tacsat$SI_DATIM,"%W"))+1) == wk)
-        if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & format(tacsat$SI_DATIM,"%m") == mth)
-      }
-    if(plot==TRUE) x11();
-    #---------------------------------------------------------------------------
-    #- Analyses when gear info is supplied  LE_GEAR
-    #---------------------------------------------------------------------------
-    if("LE_GEAR" %in% colnames(tacsat) & analyse.by == "LE_GEAR"){
-
-      gearList  <- names(which((rowSums(table(sTacsat$LE_GEAR,sTacsat$SI_SP)) - table(sTacsat$LE_GEAR,sTacsat$SI_SP)[,"0"])>40))
-
-      #- Mirror the tacsat dataset and make a selection
-      tyg       <- subset(sTacsat,is.na(LE_GEAR) == FALSE  & LE_GEAR %in% gearList); tygmr <- tyg; tygmr$SI_SP <- -1* tygmr$SI_SP; tygmr <- rbind(tyg,tygmr)
-      tng       <- subset(sTacsat,is.na(LE_GEAR) == TRUE | !LE_GEAR %in% gearList); tngmr <- tng; tngmr$SI_SP <- -1* tngmr$SI_SP; tngmr <- rbind(tng,tngmr)
-
-      #-------------------------------------------------------------------------
-      #- Get general speed pattern by gear, use analysed number of kernals LE_GEAR + GENERIC
-      #-------------------------------------------------------------------------
-      res       <- list()
-      for(iGr in unique(tyg$LE_GEAR)){
-
-        #- Get rid of very influential datapoints (lower their abundance)
-          tbl                 <- table(subset(tygmr,LE_GEAR==iGr)$SI_SP);
-          spd                 <- an(names(rev(sort(tbl))[1]))
-          idx                 <- which(subset(tygmr,LE_GEAR==iGr)$SI_SP==spd)
-          nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
-          if(tbl[ac(spd)]/tbl[nxt] > 5){
-            idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
-            if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,LE_GEAR==iGr)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
-          } else { idx <- -1:-nrow(subset(tygmr,LE_GEAR==iGr))}
-
-        #-----------------------------------------------------------------------------
-        # Fit the 3 or 5 normal distributions. If parameter guestimates are
-        #  available, then use these
-        #-----------------------------------------------------------------------------
-        if(is.null(storeScheme)==TRUE){
-          res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5,maxrestarts=20,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))
-        } else {
-            #- Fitting model when mean values of peaks has been defined
-            if("means" %in% colnames(storeScheme)){
-
-              #- Extract parameters from storeScheme
-              ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"means"]
-              sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"])
-              fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"])
-
-              #- Setup parameter estimate vectors for mu and sigma
-              if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")}
-              if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")}
-              if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," ")))))
-
-              #- Fit the actual model through the normalmixEM function
-              res[[iGr]]   <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), sigma=rep(1,length(constraintsd)),
-                                              maxrestarts=20,mean.constr=constraintmn,sd.constr=constraintsd))
-            } else {
-              #- Fitting model when number of peaks has been defined
-              
-              #- Extract parameters from storeScheme
-              ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"peaks"]
-              sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"])
-              fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"])
-
-              #- Setup parameter estimate vectors for mu and sigma
-              if(ss==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")}
-              if(ss==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")}
-              if(length(ss)>0){
-
-                #- Fit the actual model through the normalmixEM function
-                if(is.na(ss)==TRUE)  res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20,  mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))
-                if(is.na(ss)==FALSE) res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=ss,maxrestarts=20,  mean.constr=constraintmn,          sd.constr=constraintsd,            sigma=rep(1,length(constraintsd))))
-              } else {               res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20,  mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))}
-            }
-          }
-        if(plot==TRUE) plot(res[[iGr]],2,breaks=100,xlim=c(-20,20))
-      }
-      if(level == "vessel"){
-        #- Transform the output into the right format
-        for(iGr in unique(tyg$LE_GEAR))
-          if(!class(res[[iGr]]) == "try-error"){ res[[iGr]] <- res[[iGr]]$mu } else { res[[iGr]] <- rep(NA,5)}
-        res             <- lapply(res,function(x){if(class(x)=="try-error"){x<-rep(NA,5)}else{x}})
-        res             <- lapply(res,sort)
-      }
-
-      if(level == "vessel"){
-        #-------------------------------------------------------------------------
-        #- Perform analyses per vessel with gear LE_GEAR + VE_REF
-        #-------------------------------------------------------------------------
-
-        if(nrow(tygmr)>40)
-          shipList  <- names(which((rowSums(table(tygmr$VE_REF,tygmr$SI_SP)) - table(tygmr$VE_REF,tygmr$SI_SP)[,"0"])>20))
-        shipFit   <- list()
-        if(exists("shipList")){
-          for(iShip in shipList){
-
-            #- Get rid of very influential datapoints (lower their abundance)
-            tbl                 <- table(subset(tygmr,VE_REF==iShip)$SI_SP);
-            spd                 <- an(names(rev(sort(tbl))[1]))
-            idx                 <- which(subset(tygmr,VE_REF==iShip)$SI_SP==spd)
-            nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
-            if(tbl[ac(spd)]/tbl[nxt] > 5){
-              idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
-              if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
-            } else { idx <- -1:-nrow(subset(tygmr,VE_REF==iShip))}
-
-            shipTacsat        <- subset(tygmr,VE_REF == iShip)
-
-            #-----------------------------------------------------------------------------
-            # Fit the 3 or 5 normal distributions. If parameter guestimates are
-            #  available, then use these
-            #-----------------------------------------------------------------------------
-
-            #- Setup parameter estimate vectors for mu and sigma
-            if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
-            if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")}
-
-            #- Fit the actual model through the normalmixEM function
-            shipFit[[iShip]]  <- try(normalmixEM(shipTacsat$SI_SP[-idx],mu=res[[names(which.max(table(shipTacsat$LE_GEAR)))]],maxit=2000,
-                                                 sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd))
-
-            if(class(shipFit[[iShip]])!= "try-error"){
-
-              #- Analyse the fit and turn it into a result of fishing - no fishing
-              #mu                <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE)
-              #sds               <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
-              mu                <- shipFit[[iShip]]$mu
-              sds               <- shipFit[[iShip]]$sigma
-
-              probs             <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
-              for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
-              SI_STATE          <- apply(probs,1,which.max)
-              
-              if(length(mu)==3){
-                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
-              if(length(mu)==5){
-                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
-              tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
-            } else { tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- NA}
-          }
-        }
-        } else {
-          for(iGr in unique(tyg$LE_GEAR)){
-            if(!class(res[[iGr]]) == "try-error"){
-
-              #- Analyse the fit and turn it into a result of fishing - no fishing
-              #mu                  <- sort.int(res[[iGr]]$mu,index.return=TRUE)
-              #sds                 <- res[[iGr]]$sigma[mu$ix]; mu <- mu$x
-              mu                  <- res[[iGr]]$mu
-              sds                 <- res[[iGr]]$sigma
-              probs               <- dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
-              for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[i],sd=sds[i]))
-              SI_STATE            <- apply(probs,1,which.max)
-
-              if(length(mu)==3){
-                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
-              if(length(mu)==5){
-                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
-              tacsat$SI_STATE[which(tacsat$ID %in% subset(tyg,LE_GEAR == iGr)$ID)] <- SI_STATE
-            }
-          }
-        }
-        #-------------------------------------------------------------------------
-        #- Perform analyses per vessel without gear NO_GEAR + VE_REF
-        #-------------------------------------------------------------------------
-        if(nrow(tngmr)>40)
-          nonshipList           <- names(which((rowSums(table(tngmr$VE_REF,tngmr$SI_SP)) - table(tngmr$VE_REF,tngmr$SI_SP)[,"0"])>20))
-        nonshipFit            <- list()
-        if(exists("nonshipList")){
-          for(iShip in nonshipList){
-
-            #- Get rid of very influential datapoints (lower their abundance)
-            tbl                 <- table(subset(tngmr,VE_REF==iShip)$SI_SP);
-            spd                 <- an(names(rev(sort(tbl))[1]))
-            idx                 <- which(subset(tngmr,VE_REF==iShip)$SI_SP==spd)
-            nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
-            if(tbl[ac(spd)]/tbl[nxt] > 5){
-              idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
-              if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tngmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
-            } else { idx <- -1:-nrow(subset(tngmr,VE_REF==iShip))}
-
-            #-----------------------------------------------------------------------------
-            # Fit the 3 or 5 normal distributions. If parameter guestimates are
-            #  available, then use these
-            #-----------------------------------------------------------------------------
-
-            shipTacsat          <- subset(tngmr,VE_REF == iShip)
-            if(exists("shipFit")){
-              if(iShip %in% names(shipFit)){
-
-                #- Setup parameter estimate vectors for mu and sigma
-                if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
-                if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")}
-
-                #- Fit the actual model through the normalmixEM function
-                nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000,
-                                                       sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd))
-            } else {
-                #- Fit the actual model through the normalmixEM function
-                nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=5,maxit=2000,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))}
-
-            if(!class(nonshipFit[[iShip]]) == "try-error"){
-
-              #- Analyse the fit and turn it into a result of fishing - no fishing
-              #mu                  <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE)
-              #sds                 <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
-              mu                  <- nonshipFit[[iShip]]$mu
-              sds                 <- nonshipFit[[iShip]]$sds
-
-              probs               <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
-              for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
-              SI_STATE            <- apply(probs,1,which.max)
-              
-              if(length(mu)==3){
-                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
-              if(length(mu)==5){
-                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
-              tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
-            }
-          }
-        }
-      }
-    }
-    #---------------------------------------------------------------------------
-    #- Analyses when vessel info is supplied  VE_REF + ENOUGH #
-    #---------------------------------------------------------------------------
-    if("VE_REF" %in% colnames(tacsat) & analyse.by == "VE_REF"){
-
-      vesselList  <- names(which((rowSums(table(sTacsat$VE_REF,sTacsat$SI_SP)) - table(sTacsat$VE_REF,sTacsat$SI_SP)[,"0"])>40))
-
-      #- Mirror the tacsat dataset and make a selection
-      tyv       <- subset(sTacsat,is.na(VE_REF) == FALSE  & VE_REF %in% vesselList); tyvmr <- tyv; tyvmr$SI_SP <- -1* tyvmr$SI_SP; tyvmr <- rbind(tyv,tyvmr)
-      tnv       <- subset(sTacsat,is.na(VE_REF) == TRUE | !VE_REF %in% vesselList); tnvmr <- tnv; tnvmr$SI_SP <- -1* tnvmr$SI_SP; tnvmr <- rbind(tnv,tnvmr)
-
-      #- Perform analyses per vessel
-      if(nrow(tyv)>40)
-        shipList  <- names(which((rowSums(table(tyvmr$VE_REF,tyvmr$SI_SP)) - table(tyvmr$VE_REF,tyvmr$SI_SP)[,"0"])>20))
-      shipFit   <- list()
-      if(exists("shipList")){
-        for(iShip in shipList){
-
-          #- Get rid of very influential data points
-          tbl                 <- table(subset(tyvmr,VE_REF==iShip)$SI_SP);
-          spd                 <- an(names(rev(sort(tbl))[1]))
-          idx                 <- which(subset(tyvmr,VE_REF==iShip)$SI_SP==spd)
-          nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
-          if(tbl[ac(spd)]/tbl[nxt] > 5){
-            idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
-            if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tyvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
-          } else { idx <- -1:-nrow(subset(tyvmr,VE_REF==iShip))}
-
-          shipTacsat        <- subset(tyvmr,VE_REF == iShip)
-
-          #-----------------------------------------------------------------------------
-          # Fit the 3 or 5 normal distributions. If parameter guestimates are
-          #  available, then use these
-          #-----------------------------------------------------------------------------
-          if(is.null(storeScheme)==TRUE){
-            shipFit[[iShip]]    <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))
-          } else {
-              #- Fitting model when mean values of peaks has been defined
-              if("means" %in% colnames(storeScheme)){
-
-                #- Extract parameters from storeScheme
-                ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"means"]
-                sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"])
-                fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"])
-
-                #- Setup parameter estimate vectors for mu and sigma
-                if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
-                if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")}
-                if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," ")))))
-
-                #- Fit the actual model through the normalmixEM function
-                shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), maxit=2000,
-                                           mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd))))
-              } else {
-                #- Fitting model when number of peaks has been defined
-
-                #- Extract parameters from storeScheme
-                ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"peaks"]
-                sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"])
-                fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
-                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"])
-
-                #- Setup parameter estimate vectors for mu and sigma
-                if(ss==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
-                if(ss==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")}
-                if(length(ss)>0){
-
-                  #- Fit the actual model through the normalmixEM function
-                  if(is.na(ss)==TRUE) shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))
-                  if(is.na(ss)==FALSE) shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=ss,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd))))
-                } else {           shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))}
-              }
-            }
-          if(plot==TRUE) plot(shipFit[[iShip]],2,breaks=100,xlim=c(-20,20))
-          if(!class(shipFit[[iShip]]) == "try-error"){
-
-            #- Analyse the fit and turn it into a result of fishing - no fishing
-            #mu                <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE)
-            #sds               <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
-            mu                <- shipFit[[iShip]]$mu
-            sds               <- shipFit[[iShip]]$sigma
-
-            probs             <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
-            for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
-            SI_STATE          <- apply(probs,1,which.max)
-
-            if(length(mu)==3){
-              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
-            if(length(mu)==5){
-              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
-            tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
-          }
-        }
-      }
-      #-------------------------------------------------------------------------
-      #- Perform analyses for all vessels left over VE_REF + NOT ENOUGH #
-      #-------------------------------------------------------------------------
-      if(nrow(tnvmr)>40)
-        nonshipList           <- names(which((rowSums(table(tnvmr$VE_REF,tnvmr$SI_SP)) - table(tnvmr$VE_REF,tnvmr$SI_SP)[,"0"])>20))
-      nonshipFit            <- list()
-      if(exists("nonshipList")){
-        for(iShip in nonshipList){
-
-          #- Get rid of very influential datapoints (lower their abundance)
-          tbl                 <- table(subset(tnvmr,VE_REF==iShip)$SI_SP);
-          spd                 <- an(names(rev(sort(tbl))[1]))
-          idx                 <- which(subset(tnvmr,VE_REF==iShip)$SI_SP==spd)
-          nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
-          if(tbl[ac(spd)]/tbl[nxt] > 5){
-            idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
-            if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tnvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
-          } else { idx <- -1:-nrow(subset(tnvmr,VE_REF==iShip))}
-
-          shipTacsat          <- subset(tnvmr,VE_REF == iShip)
-          #-----------------------------------------------------------------------------
-          # Fit the 3 or 5 normal distributions. If parameter guestimates are
-          #  available, then use these
-          #-----------------------------------------------------------------------------
-          if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
-          if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")}
-
-          #- Fit the actual model through the normalmixEM function
-          nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd))))
-
-          if(!class(nonshipFit[[iShip]]) == "try-error"){
-
-            #- Analyse the fit and turn it into a result of fishing - no fishing
-            #mu                  <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE)
-            #sds                 <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
-            mu                  <- nonshipFit[[iShip]]$mu
-            sds                 <- nonshipFit[[iShip]]$sigma
-
-            probs               <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
-            for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
-            SI_STATE            <- apply(probs,1,which.max)
-
-            if(length(mu)==3)
-              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)
-            if(length(mu)==5)
-              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)
-            tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
-          }
-        }
-      }
-    }
-  }
-  
-leftOverTacsat  <- tacsatOrig[which(!tacsatOrig$ID %in% tacsat$ID),]
-tacsat          <- rbind(tacsat,leftOverTacsat)
-tacsat          <- orderBy(~ID,tacsat)
-cat("Note that in case of 5 peaks: no fishing = h, fishing = f, steaming / no fishing = s\n")
-cat("Note that in case of 3 peaks: fishing = f, steaming / no fishing = s\n")
-return(tacsat$SI_STATE)}
\ No newline at end of file
+#' Define activity of fishing vessel based on speed profile analyses.
+#' 
+#' Analyse tacsat data by gear or vessel and define, based on speed profile and
+#' fitting normal distributions through these speed profiles, fishing and other
+#' activities.
+#' 
+#' The analysis assumes that the speed profile close to 0 can be mirrored to
+#' create a good normal distribution. Success of fit might depend on the
+#' initial guess in peaks or mean peak values.
+#' 
+#' Note that the default value of sigma0 = 0.911 (used when prior estimates of
+#' parameters are not given). The value of 0.911 corresponds to a width of
+#' 1.5knots on each side of the mean under a 90 percent CI.
+#' 
+#' @param tacsat tacsat dataset
+#' @param units Analyse by: "year", "month" and "week". "month" and "week"
+#' cannot be used at same time.
+#' @param analyse.by Analyse tacsat by gear ("LE_GEAR") or vessel ("VE_REF").
+#' @param storeScheme If \code{\link{activityTacsatAnalyse}} is used, supply
+#' output here.
+#' @param plot Logical. Whether the results of the fit of the normal
+#' distributions should be plotted
+#' @param level If analyse.by="LE_GEAR" is selected, there is an option to
+#' analyse at vessel level too, but taking the gear parameter estimates as
+#' starting values. level = "vessel" turns this option on.
+#' @return In general, a 5-peak analysis results in h=no fishing / in harbour,
+#' f=fishing, s=steaming. A 3-peak analyses results in f=fishing (closest to
+#' zero speed) and s=steaming.
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{activityTacsatAnalyse}},
+#' \code{\link{segmentedTacsatSpeed}}
+#' @examples
+#' 
+#' data(tacsat)
+#' data(eflalo)
+#' 
+#' tacsatp         <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]
+#' 
+#' tacsatp$LE_GEAR <- ac(tacsatp$LE_GEAR)
+#' tacsatp$LE_GEAR[which(is.na(tacsatp$LE_GEAR)==TRUE)] <- "NO_GEAR"
+#' tacsat          <- tacsatp
+#' 
+#' #--------- LE_GEAR -----------
+#' #- Visual analyses of activity, and define number of peaks / kernels that can be
+#' #  observed (either 3 or 5)
+#' \dontrun{
+#' storeScheme   <- activityTacsatAnalyse(subset(tacsat,LE_GEAR == "OTM" &
+#'                       format(SI_DATIM,"%Y") == 1801),units="year",analyse.by="LE_GEAR",identify="means")
+#' res           <- activityTacsat(subset(tacsat,LE_GEAR == "OTM" &
+#'                       format(SI_DATIM,"%Y") == 1801),units="year",analyse.by="LE_GEAR",
+#'                       storeScheme,plot=TRUE)
+#' res           <- activityTacsat(subset(tacsat,LE_GEAR == "OTM" &
+#'                       format(SI_DATIM,"%Y") == 1801),units="year",analyse.by="LE_GEAR",
+#'                       storeScheme,plot=TRUE,level="vessel")
+#' 
+#' #--------- VE_REF -----------
+#' tacsat        <- subset(tacsat,VE_REF == "801")
+#' storeScheme   <- activityTacsatAnalyse(tacsat,units="year",analyse.by="VE_REF",
+#'                                        identify="means")
+#' 
+#' #- Run activityTacsat
+#' res           <- activityTacsat(tacsat,units="year",analyse.by="VE_REF",storeScheme,
+#'                                 plot=TRUE,level="all")
+#' }
+#' 
+#' @export activityTacsat
+activityTacsat <- function(tacsat,units="year",analyse.by="LE_GEAR",storeScheme=NULL,plot=FALSE,level="all"){
+
+  require("mixtools")
+  if (!"SI_DATIM" %in% colnames(tacsat))
+        tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,
+            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+  if(!analyse.by %in% c("LE_GEAR","VE_REF")) stop("Analysing only by gear or vessel")
+
+  #- Make subset for only those tacsat records that have speed
+  tacsat$ID   <- 1:nrow(tacsat)
+  tacsat$SI_STATE <- NA
+  tacsatOrig  <- tacsat
+  idx         <- which(is.na(tacsat$SI_SP)==FALSE)
+  tacsat      <- tacsat[idx,]
+  
+  #- If sigma is NULL it needs to be estimated and gets a variable name
+  storeScheme$sigma0[which(storeScheme$sigma0==0)] <- "d"
+
+  if(units == "all"){   yrs <- 0;                                   mths  <- 0;                                    wks  <- 0}
+  if(units == "year"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- 0;                                    wks  <- 0}
+  if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- an(sort(unique(format(tacsat$SI_DATIM,"%m")))); wks  <- 0}
+  if(units == "week"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks   <- an(sort(unique(format(tacsat$SI_DATIM,"%W"))))+1;  mths <- 0}
+  
+  runScheme                 <- expand.grid(years=yrs,months=mths,weeks=wks)
+  
+  #-----------------------------------------------------------------------------
+  # Start run for all combinations of gear / vessel and units
+  #-----------------------------------------------------------------------------
+  
+  for(iRun in 1:nrow(runScheme)){
+    yr  <- runScheme[iRun,"years"]
+    mth <- runScheme[iRun,"months"]
+    wk  <- runScheme[iRun,"weeks"]
+    if(nrow(runScheme)==1){ sTacsat <- tacsat
+    } else {
+        if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr)
+        if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & (an(format( tacsat$SI_DATIM,"%W"))+1) == wk)
+        if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & format(tacsat$SI_DATIM,"%m") == mth)
+      }
+    if(plot==TRUE) x11();
+    #---------------------------------------------------------------------------
+    #- Analyses when gear info is supplied  LE_GEAR
+    #---------------------------------------------------------------------------
+    if("LE_GEAR" %in% colnames(tacsat) & analyse.by == "LE_GEAR"){
+
+      gearList  <- names(which((rowSums(table(sTacsat$LE_GEAR,sTacsat$SI_SP)) - table(sTacsat$LE_GEAR,sTacsat$SI_SP)[,"0"])>40))
+
+      #- Mirror the tacsat dataset and make a selection
+      tyg       <- subset(sTacsat,is.na(LE_GEAR) == FALSE  & LE_GEAR %in% gearList); tygmr <- tyg; tygmr$SI_SP <- -1* tygmr$SI_SP; tygmr <- rbind(tyg,tygmr)
+      tng       <- subset(sTacsat,is.na(LE_GEAR) == TRUE | !LE_GEAR %in% gearList); tngmr <- tng; tngmr$SI_SP <- -1* tngmr$SI_SP; tngmr <- rbind(tng,tngmr)
+
+      #-------------------------------------------------------------------------
+      #- Get general speed pattern by gear, use analysed number of kernals LE_GEAR + GENERIC
+      #-------------------------------------------------------------------------
+      res       <- list()
+      for(iGr in unique(tyg$LE_GEAR)){
+
+        #- Get rid of very influential datapoints (lower their abundance)
+          tbl                 <- table(subset(tygmr,LE_GEAR==iGr)$SI_SP);
+          spd                 <- an(names(rev(sort(tbl))[1]))
+          idx                 <- which(subset(tygmr,LE_GEAR==iGr)$SI_SP==spd)
+          nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
+          if(tbl[ac(spd)]/tbl[nxt] > 5){
+            idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
+            if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,LE_GEAR==iGr)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
+          } else { idx <- -1:-nrow(subset(tygmr,LE_GEAR==iGr))}
+
+        #-----------------------------------------------------------------------------
+        # Fit the 3 or 5 normal distributions. If parameter guestimates are
+        #  available, then use these
+        #-----------------------------------------------------------------------------
+        if(is.null(storeScheme)==TRUE){
+          res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5,maxrestarts=20,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))
+        } else {
+            #- Fitting model when mean values of peaks has been defined
+            if("means" %in% colnames(storeScheme)){
+
+              #- Extract parameters from storeScheme
+              ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"means"]
+              sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"])
+              fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"])
+
+              #- Setup parameter estimate vectors for mu and sigma
+              if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")}
+              if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")}
+              if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," ")))))
+
+              #- Fit the actual model through the normalmixEM function
+              res[[iGr]]   <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), sigma=rep(1,length(constraintsd)),
+                                              maxrestarts=20,mean.constr=constraintmn,sd.constr=constraintsd))
+            } else {
+              #- Fitting model when number of peaks has been defined
+              
+              #- Extract parameters from storeScheme
+              ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"peaks"]
+              sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"])
+              fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                               storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"])
+
+              #- Setup parameter estimate vectors for mu and sigma
+              if(ss==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")}
+              if(ss==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")}
+              if(length(ss)>0){
+
+                #- Fit the actual model through the normalmixEM function
+                if(is.na(ss)==TRUE)  res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20,  mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))
+                if(is.na(ss)==FALSE) res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=ss,maxrestarts=20,  mean.constr=constraintmn,          sd.constr=constraintsd,            sigma=rep(1,length(constraintsd))))
+              } else {               res[[iGr]]    <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20,  mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))}
+            }
+          }
+        if(plot==TRUE) plot(res[[iGr]],2,breaks=100,xlim=c(-20,20))
+      }
+      if(level == "vessel"){
+        #- Transform the output into the right format
+        for(iGr in unique(tyg$LE_GEAR))
+          if(!class(res[[iGr]]) == "try-error"){ res[[iGr]] <- res[[iGr]]$mu } else { res[[iGr]] <- rep(NA,5)}
+        res             <- lapply(res,function(x){if(class(x)=="try-error"){x<-rep(NA,5)}else{x}})
+        res             <- lapply(res,sort)
+      }
+
+      if(level == "vessel"){
+        #-------------------------------------------------------------------------
+        #- Perform analyses per vessel with gear LE_GEAR + VE_REF
+        #-------------------------------------------------------------------------
+
+        if(nrow(tygmr)>40)
+          shipList  <- names(which((rowSums(table(tygmr$VE_REF,tygmr$SI_SP)) - table(tygmr$VE_REF,tygmr$SI_SP)[,"0"])>20))
+        shipFit   <- list()
+        if(exists("shipList")){
+          for(iShip in shipList){
+
+            #- Get rid of very influential datapoints (lower their abundance)
+            tbl                 <- table(subset(tygmr,VE_REF==iShip)$SI_SP);
+            spd                 <- an(names(rev(sort(tbl))[1]))
+            idx                 <- which(subset(tygmr,VE_REF==iShip)$SI_SP==spd)
+            nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
+            if(tbl[ac(spd)]/tbl[nxt] > 5){
+              idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
+              if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
+            } else { idx <- -1:-nrow(subset(tygmr,VE_REF==iShip))}
+
+            shipTacsat        <- subset(tygmr,VE_REF == iShip)
+
+            #-----------------------------------------------------------------------------
+            # Fit the 3 or 5 normal distributions. If parameter guestimates are
+            #  available, then use these
+            #-----------------------------------------------------------------------------
+
+            #- Setup parameter estimate vectors for mu and sigma
+            if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
+            if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")}
+
+            #- Fit the actual model through the normalmixEM function
+            shipFit[[iShip]]  <- try(normalmixEM(shipTacsat$SI_SP[-idx],mu=res[[names(which.max(table(shipTacsat$LE_GEAR)))]],maxit=2000,
+                                                 sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd))
+
+            if(class(shipFit[[iShip]])!= "try-error"){
+
+              #- Analyse the fit and turn it into a result of fishing - no fishing
+              #mu                <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE)
+              #sds               <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
+              mu                <- shipFit[[iShip]]$mu
+              sds               <- shipFit[[iShip]]$sigma
+
+              probs             <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
+              for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
+              SI_STATE          <- apply(probs,1,which.max)
+              
+              if(length(mu)==3){
+                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
+              if(length(mu)==5){
+                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
+              tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
+            } else { tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- NA}
+          }
+        }
+        } else {
+          for(iGr in unique(tyg$LE_GEAR)){
+            if(!class(res[[iGr]]) == "try-error"){
+
+              #- Analyse the fit and turn it into a result of fishing - no fishing
+              #mu                  <- sort.int(res[[iGr]]$mu,index.return=TRUE)
+              #sds                 <- res[[iGr]]$sigma[mu$ix]; mu <- mu$x
+              mu                  <- res[[iGr]]$mu
+              sds                 <- res[[iGr]]$sigma
+              probs               <- dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
+              for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[i],sd=sds[i]))
+              SI_STATE            <- apply(probs,1,which.max)
+
+              if(length(mu)==3){
+                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
+              if(length(mu)==5){
+                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
+              tacsat$SI_STATE[which(tacsat$ID %in% subset(tyg,LE_GEAR == iGr)$ID)] <- SI_STATE
+            }
+          }
+        }
+        #-------------------------------------------------------------------------
+        #- Perform analyses per vessel without gear NO_GEAR + VE_REF
+        #-------------------------------------------------------------------------
+        if(nrow(tngmr)>40)
+          nonshipList           <- names(which((rowSums(table(tngmr$VE_REF,tngmr$SI_SP)) - table(tngmr$VE_REF,tngmr$SI_SP)[,"0"])>20))
+        nonshipFit            <- list()
+        if(exists("nonshipList")){
+          for(iShip in nonshipList){
+
+            #- Get rid of very influential datapoints (lower their abundance)
+            tbl                 <- table(subset(tngmr,VE_REF==iShip)$SI_SP);
+            spd                 <- an(names(rev(sort(tbl))[1]))
+            idx                 <- which(subset(tngmr,VE_REF==iShip)$SI_SP==spd)
+            nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
+            if(tbl[ac(spd)]/tbl[nxt] > 5){
+              idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
+              if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tngmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
+            } else { idx <- -1:-nrow(subset(tngmr,VE_REF==iShip))}
+
+            #-----------------------------------------------------------------------------
+            # Fit the 3 or 5 normal distributions. If parameter guestimates are
+            #  available, then use these
+            #-----------------------------------------------------------------------------
+
+            shipTacsat          <- subset(tngmr,VE_REF == iShip)
+            if(exists("shipFit")){
+              if(iShip %in% names(shipFit)){
+
+                #- Setup parameter estimate vectors for mu and sigma
+                if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
+                if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")}
+
+                #- Fit the actual model through the normalmixEM function
+                nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000,
+                                                       sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd))
+            } else {
+                #- Fit the actual model through the normalmixEM function
+                nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=5,maxit=2000,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))}
+
+            if(!class(nonshipFit[[iShip]]) == "try-error"){
+
+              #- Analyse the fit and turn it into a result of fishing - no fishing
+              #mu                  <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE)
+              #sds                 <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
+              mu                  <- nonshipFit[[iShip]]$mu
+              sds                 <- nonshipFit[[iShip]]$sds
+
+              probs               <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
+              for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
+              SI_STATE            <- apply(probs,1,which.max)
+              
+              if(length(mu)==3){
+                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
+              if(length(mu)==5){
+                SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
+              tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
+            }
+          }
+        }
+      }
+    }
+    #---------------------------------------------------------------------------
+    #- Analyses when vessel info is supplied  VE_REF + ENOUGH #
+    #---------------------------------------------------------------------------
+    if("VE_REF" %in% colnames(tacsat) & analyse.by == "VE_REF"){
+
+      vesselList  <- names(which((rowSums(table(sTacsat$VE_REF,sTacsat$SI_SP)) - table(sTacsat$VE_REF,sTacsat$SI_SP)[,"0"])>40))
+
+      #- Mirror the tacsat dataset and make a selection
+      tyv       <- subset(sTacsat,is.na(VE_REF) == FALSE  & VE_REF %in% vesselList); tyvmr <- tyv; tyvmr$SI_SP <- -1* tyvmr$SI_SP; tyvmr <- rbind(tyv,tyvmr)
+      tnv       <- subset(sTacsat,is.na(VE_REF) == TRUE | !VE_REF %in% vesselList); tnvmr <- tnv; tnvmr$SI_SP <- -1* tnvmr$SI_SP; tnvmr <- rbind(tnv,tnvmr)
+
+      #- Perform analyses per vessel
+      if(nrow(tyv)>40)
+        shipList  <- names(which((rowSums(table(tyvmr$VE_REF,tyvmr$SI_SP)) - table(tyvmr$VE_REF,tyvmr$SI_SP)[,"0"])>20))
+      shipFit   <- list()
+      if(exists("shipList")){
+        for(iShip in shipList){
+
+          #- Get rid of very influential data points
+          tbl                 <- table(subset(tyvmr,VE_REF==iShip)$SI_SP);
+          spd                 <- an(names(rev(sort(tbl))[1]))
+          idx                 <- which(subset(tyvmr,VE_REF==iShip)$SI_SP==spd)
+          nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
+          if(tbl[ac(spd)]/tbl[nxt] > 5){
+            idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
+            if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tyvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
+          } else { idx <- -1:-nrow(subset(tyvmr,VE_REF==iShip))}
+
+          shipTacsat        <- subset(tyvmr,VE_REF == iShip)
+
+          #-----------------------------------------------------------------------------
+          # Fit the 3 or 5 normal distributions. If parameter guestimates are
+          #  available, then use these
+          #-----------------------------------------------------------------------------
+          if(is.null(storeScheme)==TRUE){
+            shipFit[[iShip]]    <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))
+          } else {
+              #- Fitting model when mean values of peaks has been defined
+              if("means" %in% colnames(storeScheme)){
+
+                #- Extract parameters from storeScheme
+                ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"means"]
+                sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"])
+                fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"])
+
+                #- Setup parameter estimate vectors for mu and sigma
+                if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
+                if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")}
+                if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," ")))))
+
+                #- Fit the actual model through the normalmixEM function
+                shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), maxit=2000,
+                                           mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd))))
+              } else {
+                #- Fitting model when number of peaks has been defined
+
+                #- Extract parameters from storeScheme
+                ss          <- storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"peaks"]
+                sigma       <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"])
+                fixPeaks    <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months     == mth &
+                                                 storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"])
+
+                #- Setup parameter estimate vectors for mu and sigma
+                if(ss==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
+                if(ss==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")}
+                if(length(ss)>0){
+
+                  #- Fit the actual model through the normalmixEM function
+                  if(is.na(ss)==TRUE) shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))
+                  if(is.na(ss)==FALSE) shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=ss,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd))))
+                } else {           shipFit[[iShip]]   <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))}
+              }
+            }
+          if(plot==TRUE) plot(shipFit[[iShip]],2,breaks=100,xlim=c(-20,20))
+          if(!class(shipFit[[iShip]]) == "try-error"){
+
+            #- Analyse the fit and turn it into a result of fishing - no fishing
+            #mu                <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE)
+            #sds               <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
+            mu                <- shipFit[[iShip]]$mu
+            sds               <- shipFit[[iShip]]$sigma
+
+            probs             <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
+            for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
+            SI_STATE          <- apply(probs,1,which.max)
+
+            if(length(mu)==3){
+              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)}
+            if(length(mu)==5){
+              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)}
+            tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
+          }
+        }
+      }
+      #-------------------------------------------------------------------------
+      #- Perform analyses for all vessels left over VE_REF + NOT ENOUGH #
+      #-------------------------------------------------------------------------
+      if(nrow(tnvmr)>40)
+        nonshipList           <- names(which((rowSums(table(tnvmr$VE_REF,tnvmr$SI_SP)) - table(tnvmr$VE_REF,tnvmr$SI_SP)[,"0"])>20))
+      nonshipFit            <- list()
+      if(exists("nonshipList")){
+        for(iShip in nonshipList){
+
+          #- Get rid of very influential datapoints (lower their abundance)
+          tbl                 <- table(subset(tnvmr,VE_REF==iShip)$SI_SP);
+          spd                 <- an(names(rev(sort(tbl))[1]))
+          idx                 <- which(subset(tnvmr,VE_REF==iShip)$SI_SP==spd)
+          nxt                 <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1]))
+          if(tbl[ac(spd)]/tbl[nxt] > 5){
+            idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE)
+            if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tnvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE))
+          } else { idx <- -1:-nrow(subset(tnvmr,VE_REF==iShip))}
+
+          shipTacsat          <- subset(tnvmr,VE_REF == iShip)
+          #-----------------------------------------------------------------------------
+          # Fit the 3 or 5 normal distributions. If parameter guestimates are
+          #  available, then use these
+          #-----------------------------------------------------------------------------
+          if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")}
+          if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")}
+
+          #- Fit the actual model through the normalmixEM function
+          nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd))))
+
+          if(!class(nonshipFit[[iShip]]) == "try-error"){
+
+            #- Analyse the fit and turn it into a result of fishing - no fishing
+            #mu                  <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE)
+            #sds                 <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x
+            mu                  <- nonshipFit[[iShip]]$mu
+            sds                 <- nonshipFit[[iShip]]$sigma
+
+            probs               <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)])
+            for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i]))
+            SI_STATE            <- apply(probs,1,which.max)
+
+            if(length(mu)==3)
+              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)
+            if(length(mu)==5)
+              SI_STATE        <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)
+            tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)]
+          }
+        }
+      }
+    }
+  }
+  
+leftOverTacsat  <- tacsatOrig[which(!tacsatOrig$ID %in% tacsat$ID),]
+tacsat          <- rbind(tacsat,leftOverTacsat)
+tacsat          <- orderBy(~ID,tacsat)
+cat("Note that in case of 5 peaks: no fishing = h, fishing = f, steaming / no fishing = s\n")
+cat("Note that in case of 3 peaks: fishing = f, steaming / no fishing = s\n")
+return(tacsat$SI_STATE)}
diff --git a/vmstools/R/activityTacsatAnalyse.r b/vmstools/R/activityTacsatAnalyse.r
index 59d3c6c..48215ff 100644
--- a/vmstools/R/activityTacsatAnalyse.r
+++ b/vmstools/R/activityTacsatAnalyse.r
@@ -1,52 +1,106 @@
-activityTacsatAnalyse <- function(tacsat,units="year",analyse.by="LE_GEAR",identify="peaks"){
-
-  if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'")
-  if(!analyse.by %in% c("LE_GEAR","VE_REF")) warning("Analysing by unknown column variable, please check!")
-  if(analyse.by %in% colnames(tacsat)){
-    if(units == "all"){   yrs <- 0; mths <- 0; wks <- 0}
-    if(units == "year"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- 0;                                    wks  <- 0}
-    if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- sort(unique(month(tacsat$SI_DATIM))); wks  <- 0}
-    if(units == "week"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks   <- sort(unique(week(tacsat$SI_DATIM)));  mths <- 0}
-
-    runScheme                 <- expand.grid(years=yrs,months=mths,weeks=wks,stringsAsFactors=FALSE)
-    storeScheme               <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by]),stringsAsFactors=FALSE)
-    storeScheme$peaks         <- NA
-    storeScheme$fixPeaks      <- FALSE
-    storeScheme$sigma0        <- 0.911
-    if(identify == "means") storeScheme$means         <- NA
-    for(iRun in 1:nrow(runScheme)){
-      yr  <- runScheme[iRun,"years"]
-      mth <- runScheme[iRun,"months"]
-      wk  <- runScheme[iRun,"weeks"]
-      if(nrow(runScheme)==1){ sTacsat <- tacsat
-      } else {
-          if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr)
-          if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk)
-          if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth)
-        }
-
-      for(iBy in na.omit(unique(sTacsat[,analyse.by]))){
-        dat     <- subset(sTacsat,sTacsat[,analyse.by] == iBy)
-        datmr   <- dat; datmr$SI_SP <- -1*dat$SI_SP; datmr <- rbind(dat,datmr)
-        xrange  <- pmin(20,range(datmr$SI_SP),na.rm=TRUE); datmr$SI_SP[which(abs(datmr$SI_SP) >20)] <- NA
-        hist(datmr$SI_SP,breaks=seq(-20,20,0.5),main=paste(iBy,ifelse(yr>0,yr,""),ifelse(mth>0,mth,""),ifelse(wk>0,wk,"")),xaxt="n")
-        axis(1,at=seq(-20,20,1))
-
-        require(tcltk)
-
-        pks <- callNumberPeak()
-        storeScheme[which(storeScheme$years == yr & storeScheme$months    == mth &
-                          storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"peaks"] <- pks
-
-        if(identify=="means"){
-          valPeaks <- callPeakValue(pks)
-          if(substr(valPeaks,1,1)==" ")
-            valPeaks <- substr(valPeaks,2,nchar(valPeaks))
-          storeScheme[which(storeScheme$years == yr & storeScheme$months    == mth &
-                          storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"means"] <- valPeaks
-
-        }
-      }
-    }
-  } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")}
-return(storeScheme)}
\ No newline at end of file
+#' Analyse speed profiles to define number of peaks.
+#' 
+#' Analyse histrogram of speed profiles by gear or vessel to define number of
+#' peaks
+#' 
+#' The analyses assumes that the speed profile close to 0 can be mirrored to
+#' create a good normal distribution. The left-hand side of the histogram is
+#' identical to the right-hand side and therefore potential identified peaks
+#' should be mirrored as well. The detailed 'means' approach does take more
+#' time to identify but is more succesful in fitting distributions to the speed
+#' profile. It pays off to specify mean values of the peaks by 0.5knots
+#' precision.
+#' 
+#' Additional Parameters: Sigma0: Fixed sigma value for distribution around
+#' zero. Zero (0) if parameter needs to be estimated. FixPeaks: Option to fix
+#' the top of the density curves at the initial parameter values from
+#' 'storeScheme'.
+#' 
+#' @param tacsat tacsat dataset
+#' @param units Analyse by: "year", "month" and "week". "month" and "week"
+#' cannot be used at same time
+#' @param analyse.by Analyse tacsat by gear ("LE_GEAR") or vessel ("VE_REF")
+#' @param identify Identify either the number of peaks ("peaks", rough
+#' approach) or mean speed associated with peaks ("means", detailed approach)
+#' @return Returns a \code{\link{data.frame}} with a run scheme that can be
+#' used as input to \code{\link{activityTacsat}}
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{activityTacsat}}, \code{\link{segmentedTacsatSpeed}}
+#' @examples
+#' 
+#' data(tacsat)
+#' data(eflalo)
+#' 
+#' tacsatp         <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]
+#' 
+#' tacsatp$LE_GEAR <- ac(tacsatp$LE_GEAR)
+#' tacsatp$LE_GEAR[which(is.na(tacsatp$LE_GEAR)==TRUE)] <- "NO_GEAR"
+#' tacsat          <- tacsatp
+#' 
+#' \dontrun{
+#' #--------- LE_GEAR -----------
+#' #- Visual analyses of activity, and define number of peaks / kernels that can
+#' #  be observed (either 3 or 5)
+#' storeScheme   <- activityTacsatAnalyse(subset(tacsat,LE_GEAR == "OTM" &
+#'                       year(SI_DATIM) == 1801),units="year",analyse.by="LE_GEAR",
+#'                       identify="peaks")
+#' storeScheme   <- activityTacsatAnalyse(subset(tacsat,LE_GEAR == "OTM" &
+#'                       year(SI_DATIM) == 1801),units="year",analyse.by="LE_GEAR",
+#'                       identify="means")
+#' 
+#' }
+#' 
+#' @export activityTacsatAnalyse
+activityTacsatAnalyse <- function(tacsat,units="year",analyse.by="LE_GEAR",identify="peaks"){
+
+  if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'")
+  if(!analyse.by %in% c("LE_GEAR","VE_REF")) warning("Analysing by unknown column variable, please check!")
+  if(analyse.by %in% colnames(tacsat)){
+    if(units == "all"){   yrs <- 0; mths <- 0; wks <- 0}
+    if(units == "year"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- 0;                                    wks  <- 0}
+    if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- sort(unique(month(tacsat$SI_DATIM))); wks  <- 0}
+    if(units == "week"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks   <- sort(unique(week(tacsat$SI_DATIM)));  mths <- 0}
+
+    runScheme                 <- expand.grid(years=yrs,months=mths,weeks=wks,stringsAsFactors=FALSE)
+    storeScheme               <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by]),stringsAsFactors=FALSE)
+    storeScheme$peaks         <- NA
+    storeScheme$fixPeaks      <- FALSE
+    storeScheme$sigma0        <- 0.911
+    if(identify == "means") storeScheme$means         <- NA
+    for(iRun in 1:nrow(runScheme)){
+      yr  <- runScheme[iRun,"years"]
+      mth <- runScheme[iRun,"months"]
+      wk  <- runScheme[iRun,"weeks"]
+      if(nrow(runScheme)==1){ sTacsat <- tacsat
+      } else {
+          if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr)
+          if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk)
+          if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth)
+        }
+
+      for(iBy in na.omit(unique(sTacsat[,analyse.by]))){
+        dat     <- subset(sTacsat,sTacsat[,analyse.by] == iBy)
+        datmr   <- dat; datmr$SI_SP <- -1*dat$SI_SP; datmr <- rbind(dat,datmr)
+        xrange  <- pmin(20,range(datmr$SI_SP),na.rm=TRUE); datmr$SI_SP[which(abs(datmr$SI_SP) >20)] <- NA
+        hist(datmr$SI_SP,breaks=seq(-20,20,0.5),main=paste(iBy,ifelse(yr>0,yr,""),ifelse(mth>0,mth,""),ifelse(wk>0,wk,"")),xaxt="n")
+        axis(1,at=seq(-20,20,1))
+
+        require(tcltk)
+
+        pks <- callNumberPeak()
+        storeScheme[which(storeScheme$years == yr & storeScheme$months    == mth &
+                          storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"peaks"] <- pks
+
+        if(identify=="means"){
+          valPeaks <- callPeakValue(pks)
+          if(substr(valPeaks,1,1)==" ")
+            valPeaks <- substr(valPeaks,2,nchar(valPeaks))
+          storeScheme[which(storeScheme$years == yr & storeScheme$months    == mth &
+                          storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"means"] <- valPeaks
+
+        }
+      }
+    }
+  } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")}
+return(storeScheme)}
diff --git a/vmstools/R/addWidth.r b/vmstools/R/addWidth.r
index 1f3e85b..6d4f5e7 100644
--- a/vmstools/R/addWidth.r
+++ b/vmstools/R/addWidth.r
@@ -1,34 +1,71 @@
-addWidth <- function(interpolation,gearWidth){
-
-
-allPolygons <- list()
-counter     <- 0
-for(i in 1:length(interpolation)){
-
-    #Take the interpolated points
-  xs <- interpolation[[i]][-1,1]
-  ys <- interpolation[[i]][-1,2]
-
-  if(all(is.na(xs))==F & all(is.na(ys))==FALSE){
-    counter <- counter + 1
-      #Calculate the bearing towards and away from each point
-    bear1 <- bearing(xs[1:(length(xs)-2)],ys[1:(length(xs)-2)],xs[2:(length(xs)-1)],ys[2:(length(xs)-1)])
-    bear2 <- bearing(xs[2:(length(xs)-1)],ys[2:(length(xs)-1)],xs[3:length(xs)],ys[3:length(xs)])
-    avbear<- atan2(mapply(sum,sin(bear1*(pi/180))+sin(bear2*(pi/180))),mapply(sum,cos(bear1*(pi/180))+cos(bear2*(pi/180))))*(180/pi)
-
-      #Take the average of the two
-    avbear<- c(avbear[1],avbear,avbear[length(avbear)])
-
-      #Calculate the destinated point taking a begin point, a bearing and a certain distance to travel
-    outpointr <- destFromBearing(xs,ys,(avbear+90+360)%%360,gearWidth/2)
-    outpointl <- destFromBearing(xs,ys,(avbear-90+360)%%360,gearWidth/2)
-
-    singlePolygons <- list()
-    for(j in 1:(nrow(outpointr)-1)){
-      singlePolygons[[j]]   <- Polygon(cbind(c(outpointr[j,1],outpointl[j,1],outpointl[j+1,1],outpointr[j+1,1],outpointr[j,1]),
-                                             c(outpointr[j,2],outpointl[j,2],outpointl[j+1,2],outpointr[j+1,2],outpointr[j,2])))
-    }
-    allPolygons[[counter]] <- Polygons(singlePolygons,ID=ac(counter))
-  }
-}
-return(SpatialPolygons(allPolygons))}
+#' Add width of gear to interpolated gear track
+#' 
+#' Creates a SpatialPolygon object containing interpolated tracks including the
+#' width of a gear and can be plotted as such.
+#' 
+#' 
+#' @param interpolation interpolated dataset as obtained from the function
+#' 'interpolateTacsat'
+#' @param gearWidth Width of the gear in km
+#' @return Returnes an object of class 'SpatialPolygon' from the 'sp' package.
+#' Contains all polygons per interpolation, where each interpolation consists
+#' of several polygons.
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{interpolateTacsat}}, \code{\link{destFromBearing}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' require(sp)
+#' 
+#' data(tacsat)
+#' 
+#'   #Sort the VMS data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Filter the Tacsat data
+#' tacsat     <- filterTacsat(tacsat,c(4,8),hd=NULL,remDup=TRUE)
+#' 
+#'   #Interpolate the VMS data
+#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,res=100,
+#'                   method="cHs",params=list(fm=0.5,distscale=20,sigline=0.2,
+#'                   st=c(2,6)),headingAdjustment=0)
+#' 
+#' res           <- addWidth(interpolation,0.024)
+#' plot(res)
+#' 
+#' @export addWidth
+addWidth <- function(interpolation,gearWidth){
+
+
+allPolygons <- list()
+counter     <- 0
+for(i in 1:length(interpolation)){
+
+    #Take the interpolated points
+  xs <- interpolation[[i]][-1,1]
+  ys <- interpolation[[i]][-1,2]
+
+  if(all(is.na(xs))==F & all(is.na(ys))==FALSE){
+    counter <- counter + 1
+      #Calculate the bearing towards and away from each point
+    bear1 <- bearing(xs[1:(length(xs)-2)],ys[1:(length(xs)-2)],xs[2:(length(xs)-1)],ys[2:(length(xs)-1)])
+    bear2 <- bearing(xs[2:(length(xs)-1)],ys[2:(length(xs)-1)],xs[3:length(xs)],ys[3:length(xs)])
+    avbear<- atan2(mapply(sum,sin(bear1*(pi/180))+sin(bear2*(pi/180))),mapply(sum,cos(bear1*(pi/180))+cos(bear2*(pi/180))))*(180/pi)
+
+      #Take the average of the two
+    avbear<- c(avbear[1],avbear,avbear[length(avbear)])
+
+      #Calculate the destinated point taking a begin point, a bearing and a certain distance to travel
+    outpointr <- destFromBearing(xs,ys,(avbear+90+360)%%360,gearWidth/2)
+    outpointl <- destFromBearing(xs,ys,(avbear-90+360)%%360,gearWidth/2)
+
+    singlePolygons <- list()
+    for(j in 1:(nrow(outpointr)-1)){
+      singlePolygons[[j]]   <- Polygon(cbind(c(outpointr[j,1],outpointl[j,1],outpointl[j+1,1],outpointr[j+1,1],outpointr[j,1]),
+                                             c(outpointr[j,2],outpointl[j,2],outpointl[j+1,2],outpointr[j+1,2],outpointr[j,2])))
+    }
+    allPolygons[[counter]] <- Polygons(singlePolygons,ID=ac(counter))
+  }
+}
+return(SpatialPolygons(allPolygons))}
diff --git a/vmstools/R/af.R b/vmstools/R/af.R
index a8064e3..6e4b0f0 100644
--- a/vmstools/R/af.R
+++ b/vmstools/R/af.R
@@ -1,4 +1,20 @@
-`af` <-
-function(x){return(as.factor(x))}
-
-#hello world
+#' shortcut for as.factor
+#' 
+#' Change the class of an object to factor
+#' 
+#' 
+#' @param x object to turn into factor
+#' @return as.factor attempts to coerce its argument to factor type
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{as.factor}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' as.factor(5)    #returns the number 5 as class 'factor'
+#' af(5)           #returns the number 5 also as class 'factor'
+#' 
+#' @export af
+`af` <-
+function(x){return(as.factor(x))}
+
+#hello world
diff --git a/vmstools/R/an.R b/vmstools/R/an.R
index cde2bd1..436659f 100644
--- a/vmstools/R/an.R
+++ b/vmstools/R/an.R
@@ -1,3 +1,19 @@
-`an` <-
-function(x){return(as.numeric(x))}
-
+#' shortcut for as.numeric
+#' 
+#' Change the class of an object to numeric
+#' 
+#' 
+#' @param x object to turn into numeric
+#' @return as.numeric attempts to coerce its argument to numeric type
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{as.numeric}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' as.numeric("5")   #returns the character 5 as class 'numeric'
+#' an("5")           #returns the character 5 also as class 'numeric'
+#' 
+#' @export an
+`an` <-
+function(x){return(as.numeric(x))}
+
diff --git a/vmstools/R/anf.r b/vmstools/R/anf.r
index f876362..dc82088 100644
--- a/vmstools/R/anf.r
+++ b/vmstools/R/anf.r
@@ -1,2 +1,20 @@
-  'anf' <-
-  function(x) as.numeric(as.character(x)) # alias to convert factors
+#' shortcut for as.numeric(as.character())
+#' 
+#' Change the class of an object from factor to numeric
+#' 
+#' 
+#' @param x object to turn from factor into numeric
+#' @return as.numeric attempts to coerce its argument to numeric type
+#' @author Francois Bastardie
+#' @seealso \code{\link{as.numeric}}, \code{\link{as.character}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' 
+#' res <- as.factor(5.1)
+#' an(res)   #returns 1
+#' anf(res)  #returns the original 5.1
+#' 
+#' @export anf
+  'anf' <-
+  function(x) as.numeric(as.character(x)) # alias to convert factors
diff --git a/vmstools/R/bearing.R b/vmstools/R/bearing.R
index 10f1c13..13b89ed 100644
--- a/vmstools/R/bearing.R
+++ b/vmstools/R/bearing.R
@@ -1,17 +1,44 @@
-bearing <- function(lon,lat,lonRef,latRef){
-
-                    x1  <- lon
-                    y1  <- lat
-                    x2  <- lonRef
-                    y2  <- latRef
-                    
-                    y   <- sin((x2-x1)*pi/180) * cos(y2*pi/180)
-                    x   <- cos(y1*pi/180) * sin(y2*pi/180) - sin(y1*pi/180) * cos(y2*pi/180) * cos((x2-x1)*pi/180)
-                    bearing <- atan2(y,x)*180/pi
-                    bearing <- (bearing + 360)%%360
-          return(bearing)}
-
-                    
-#bearing(3.15,51.64,3.15,51.65)
-
-#hello world
\ No newline at end of file
+#' Compute bearing between two points on a sphere (approximation of the earth)
+#' at the starting point
+#' 
+#' Compute the bearing between two GPS locations defined in longitude and
+#' latitude notation on the earth. The earth is assumed to have a perfect
+#' spherical shape. Bearing is returned in compass degrees.
+#' 
+#' 
+#' @param lon Longitude of point 2
+#' @param lat Latitude of point 2
+#' @param lonRef Longitude of point 1
+#' @param latRef Latitude of point 1
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}},
+#' \code{\link{lonLatRatio}},\code{\link{distance}}
+#' @references EU Lot 2 project, based on the Haversine formula, see also:
+#' Hintzen et al. 2010 Fisheries Research
+#' @examples
+#' 
+#' lon <- -4
+#' lat <- 50
+#' lonRef <- -4.2
+#' latRef <- 51
+#' 
+#' bearing(lon,lat,lonRef,latRef) #352.8271
+#' 
+#' @export bearing
+bearing <- function(lon,lat,lonRef,latRef){
+
+                    x1  <- lon
+                    y1  <- lat
+                    x2  <- lonRef
+                    y2  <- latRef
+                    
+                    y   <- sin((x2-x1)*pi/180) * cos(y2*pi/180)
+                    x   <- cos(y1*pi/180) * sin(y2*pi/180) - sin(y1*pi/180) * cos(y2*pi/180) * cos((x2-x1)*pi/180)
+                    bearing <- atan2(y,x)*180/pi
+                    bearing <- (bearing + 360)%%360
+          return(bearing)}
+
+                    
+#bearing(3.15,51.64,3.15,51.65)
+
+#hello world
diff --git a/vmstools/R/bindAllMergedTables.r b/vmstools/R/bindAllMergedTables.r
index 79ac5b3..169bb55 100644
--- a/vmstools/R/bindAllMergedTables.r
+++ b/vmstools/R/bindAllMergedTables.r
@@ -1,69 +1,184 @@
-
-
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-  bindAllMergedTables <- 
-    function (vessels=character(), a.year='2009',species.to.keep=character(), 
-                      folder = file.path(), all.in.one.table=FALSE){
-
-  
-      bindAll <- function(vessels, a.year, species.to.keep, what, folder){
-                    
-        all.merged <- NULL # init
-        count <- 0
-        for(a.vesselid in as.character(vessels)){
-          count <- count+1
-          print(count/length(vessels)*100)
-          cat(paste("load 'merged' for",a.vesselid,"\n"))
-          er <- try(load(file = file.path(folder, paste('merged_',a.vesselid,'_',a.year,'.RData',sep=''))) 
-                        , silent=TRUE) # get the 'merged' table for this vessel
-
- 
-         if(class(er)!="try-error")   {
-           if(length(species.to.keep)!=0){
-              get.sp            <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
-              nm                <- names(merged)
-              idx.col.w         <- grep('KG', nm) # index columns weight
-              all.sp            <- get.sp(nm[idx.col.w])
-              species.to.merge <- all.sp[!all.sp %in% species.to.keep]
-              # merge to other sp
-              merged$LE_EURO_MZZ <-  replace(merged$LE_EURO_MZZ, is.na(merged$LE_EURO_MZZ), 0)
-              merged$LE_EURO_MZZ <-  merged$LE_EURO_MZZ + apply(merged[, paste('LE_EURO_',species.to.merge,sep='')], 1, sum, na.rm=TRUE)
-              merged <-  merged[, !colnames(merged) %in% paste('LE_EURO_',species.to.merge,sep='')]
-              merged$LE_KG_MZZ <-  replace(merged$LE_KG_MZZ, is.na(merged$LE_KG_MZZ), 0)
-              merged$LE_KG_MZZ <-  merged$LE_KG_MZZ + apply(merged[, paste('LE_KG_',species.to.merge,sep='')], 1, sum, na.rm=TRUE)
-              merged <-  merged[, !colnames(merged) %in% paste('LE_KG_',species.to.merge,sep='')]
-              }
-           nm       <- names(merged)
-           idx.col.w <- grep('KG', nm) # index columns with species weight
-           idx.col.v <- grep('EURO', nm) # index columns with species value
-           if(length(what)==0){  idx.col   <- c(idx.col.w, idx.col.v) # get all (but possible out of memory crash)
-                  } else{ 
-                    if(what=='weight') idx.col   <- idx.col.w
-                    if(what=='value')  idx.col   <- idx.col.v
-                  }
-           # KEEP THE ESSENTIAL
-           merged <- merged[,c( "VE_REF", "FT_REF", "VE_FLT", "LE_MET_level6", "LE_GEAR",
-                                  "SI_LATI","SI_LONG", "SI_SP", "SI_HE", "SI_STATE", "SI_DATE", "SI_TIME", "SI_HARB",
-                                     nm[idx.col], 'LE_EFF_VMS', 'KW_HOURS',
-                                         "flag")]
-           all.merged <- rbind.data.frame(all.merged, merged)
-         } else cat(paste("failure for",a.vesselid,"\n"))
-      print(nrow(all.merged))
-      }
-      # save
-      save("all.merged", file = file.path(folder, paste('all_merged_',what,'_',a.year,'.RData',sep='') ))
-
-    return()
-    }
-    
-    # calls
-    bindAll (vessels, a.year, species.to.keep, what='weight', folder)
-    bindAll (vessels,a.year, species.to.keep, what='value', folder)
-    if(all.in.one.table) bindAll (vessels, a.year, species.to.keep, what=character(), folder)
-
-
-
- return()
- }
-
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+
+
+#' bind all individual merged tables into a big one
+#' 
+#' Bind all individual merged tables into one table.
+#' 
+#' Possibly, merge irrelevant species into an other category to reduce the size
+#' of the output object. Possibly, keep only the weight, or the value per
+#' species. Both by default, but possible out of memory crash.
+#' 
+#' @param vessels Vessel ID
+#' @param a.year Year of the data
+#' @param species.to.keep Array of species to keep
+#' @param folder Local folder path
+#' @param all.in.one.table Logical: if all data should be merged into one table
+#' @author Francois Bastardie
+#' @examples
+#' 
+#' \dontrun{
+#'   data(eflalo)
+#'   data(tacsat)
+#'   data(euharbours); euharbours <- harbours
+#' 
+#'   # format
+#'   eflalo <- formatEflalo(eflalo)
+#'   tacsat <- formatTacsat(tacsat)
+#' 
+#'   # order tacsat chronologically with library(doBy)
+#'   tacsat <- sortTacsat(tacsat)
+#' 
+#'   # test each ping if in harbour or not
+#'   tacsat$SI_HARB <- NA
+#'   euharbours$Description <- euharbours$harbour
+#'   tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG),
+#'                                    lat=anf(tacsat$SI_LATI),
+#'                                    harbours=euharbours,
+#'                                    rowSize=30, returnNames=TRUE)
+#'   inHarb <- tacsat$SI_HARB
+#'   inHarb <- replace(inHarb, !is.na(inHarb), 1)
+#'   inHarb <- replace(inHarb, is.na(inHarb), 0)
+#'   inHarb <- as.numeric(inHarb)
+#' 
+#'   # assign a trip identifier
+#'   tacsat$SI_FT <- 1 # init
+#'   idx <- which(inHarb==0)
+#'   tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index
+#' 
+#'   # keep 'out of harbour' points only
+#'   # (but keep the departure point and the arrival point lying in the harbour)
+#'   startTrip <- c(diff(tacsat[,"SI_FT"]), 0)
+#'   endTrip   <- c(0, diff(tacsat[,"SI_FT"]))
+#'   tacsat[which(startTrip>0),"SI_FT"]  <-  tacsat[which(startTrip>0)+1,"SI_FT"] 
+#'   tacsat[which(endTrip<0),"SI_FT"]    <-  tacsat[which(endTrip<0)-1,"SI_FT"] 
+#'   tacsat <- tacsat[which(inHarb==0 |  startTrip>0 |  endTrip<0),]
+#' 
+#' 
+#'   # assign a state to each ping (here, useless if detectFishing at TRUE)
+#'   tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming)
+#'   # fake speed rule for fishing state
+#'   tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1
+#' 
+#' 
+#'   # reduce the size of the eflalo data by merging species
+#'   # (assuming that the other species is coded MZZ), threshold in euros.
+#'   eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ")
+#' 
+#'   # debug if eflalo has not been cleaned earlier
+#'   eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),]
+#'   
+#'   # an informed VE_FLT is also required
+#'   if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1"
+#'   
+#'   # possible mis-naming mistakes
+#'     if(!match('LE_MET_level6',colnames(eflalo))>0){
+#'       eflalo$LE_MET_level6 <- eflalo$LE_MET
+#'     }
+#' 
+#'   # debug
+#'   eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",]
+#' 
+#' 
+#'   # TEST FOR A GIVEN SET OF VESSELS
+#'   # (if detect.fishing is true then do also detection of fishing activity
+#'   # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states
+#'   # that will overwrite the existing SI_STATE)
+#'   mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"),
+#'                      general=list(output.path=file.path("C:","output"),
+#'                      visual.check=TRUE, detectFishing=TRUE, speed="segment",
+#'                      what.speed="calculated"))
+#'   # ...OR APPLY FOR ALL VESSELS IN eflalo
+#'   mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat,
+#'                      general=list(output.path=file.path("C:","output"),
+#'                      visual.check=TRUE, detectFishing=TRUE, speed="segment",
+#'                      what.speed="calculated"))
+#'   gc(reset=TRUE)
+#' 
+#'   # load the merged output table for one vessel
+#'   load(file.path("C:","output","merged_804_1800.RData"))
+#' 
+#'   # check the conservation of landings
+#'   sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE))
+#'   sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE)
+#' 
+#' 
+#'    # ...or bind all vessels (keeping only some given species here)
+#'   bindAllMergedTables (vessels=c("738", "804"), a.year = "1800",
+#'                       species.to.keep=c("PLE","COD"),
+#'                       folder = file.path("C:","output"),
+#'                       all.in.one.table=TRUE)
+#' 
+#'     # ...and load the merged output table for all vessels
+#'   load(file.path("C:","output","all_merged__1800.RData"))
+#' 
+#' }
+#' 
+#' 
+#' @export bindAllMergedTables
+  bindAllMergedTables <- 
+    function (vessels=character(), a.year='2009',species.to.keep=character(), 
+                      folder = file.path(), all.in.one.table=FALSE){
+
+  
+      bindAll <- function(vessels, a.year, species.to.keep, what, folder){
+                    
+        all.merged <- NULL # init
+        count <- 0
+        for(a.vesselid in as.character(vessels)){
+          count <- count+1
+          print(count/length(vessels)*100)
+          cat(paste("load 'merged' for",a.vesselid,"\n"))
+          er <- try(load(file = file.path(folder, paste('merged_',a.vesselid,'_',a.year,'.RData',sep=''))) 
+                        , silent=TRUE) # get the 'merged' table for this vessel
+
+ 
+         if(class(er)!="try-error")   {
+           if(length(species.to.keep)!=0){
+              get.sp            <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
+              nm                <- names(merged)
+              idx.col.w         <- grep('KG', nm) # index columns weight
+              all.sp            <- get.sp(nm[idx.col.w])
+              species.to.merge <- all.sp[!all.sp %in% species.to.keep]
+              # merge to other sp
+              merged$LE_EURO_MZZ <-  replace(merged$LE_EURO_MZZ, is.na(merged$LE_EURO_MZZ), 0)
+              merged$LE_EURO_MZZ <-  merged$LE_EURO_MZZ + apply(merged[, paste('LE_EURO_',species.to.merge,sep='')], 1, sum, na.rm=TRUE)
+              merged <-  merged[, !colnames(merged) %in% paste('LE_EURO_',species.to.merge,sep='')]
+              merged$LE_KG_MZZ <-  replace(merged$LE_KG_MZZ, is.na(merged$LE_KG_MZZ), 0)
+              merged$LE_KG_MZZ <-  merged$LE_KG_MZZ + apply(merged[, paste('LE_KG_',species.to.merge,sep='')], 1, sum, na.rm=TRUE)
+              merged <-  merged[, !colnames(merged) %in% paste('LE_KG_',species.to.merge,sep='')]
+              }
+           nm       <- names(merged)
+           idx.col.w <- grep('KG', nm) # index columns with species weight
+           idx.col.v <- grep('EURO', nm) # index columns with species value
+           if(length(what)==0){  idx.col   <- c(idx.col.w, idx.col.v) # get all (but possible out of memory crash)
+                  } else{ 
+                    if(what=='weight') idx.col   <- idx.col.w
+                    if(what=='value')  idx.col   <- idx.col.v
+                  }
+           # KEEP THE ESSENTIAL
+           merged <- merged[,c( "VE_REF", "FT_REF", "VE_FLT", "LE_MET_level6", "LE_GEAR",
+                                  "SI_LATI","SI_LONG", "SI_SP", "SI_HE", "SI_STATE", "SI_DATE", "SI_TIME", "SI_HARB",
+                                     nm[idx.col], 'LE_EFF_VMS', 'KW_HOURS',
+                                         "flag")]
+           all.merged <- rbind.data.frame(all.merged, merged)
+         } else cat(paste("failure for",a.vesselid,"\n"))
+      print(nrow(all.merged))
+      }
+      # save
+      save("all.merged", file = file.path(folder, paste('all_merged_',what,'_',a.year,'.RData',sep='') ))
+
+    return()
+    }
+    
+    # calls
+    bindAll (vessels, a.year, species.to.keep, what='weight', folder)
+    bindAll (vessels,a.year, species.to.keep, what='value', folder)
+    if(all.in.one.table) bindAll (vessels, a.year, species.to.keep, what=character(), folder)
+
+
+
+ return()
+ }
+
diff --git a/vmstools/R/building_tab_pca.r b/vmstools/R/building_tab_pca.r
index b344d0d..6a98f34 100644
--- a/vmstools/R/building_tab_pca.r
+++ b/vmstools/R/building_tab_pca.r
@@ -1,11 +1,39 @@
-#########################################
-# Build the table with the main species #
-#########################################
-
-building_tab_pca=function(data,especes){
-  p=ncol(data)
-  noms=colnames(data)
-  ind_princ=which(is.element(noms,especes))
-  princ=data[,ind_princ]
-  return(princ)
-}
+#########################################
+# Build the table with the main species #
+#########################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export building_tab_pca
+building_tab_pca=function(data,especes){
+  p=ncol(data)
+  noms=colnames(data)
+  ind_princ=which(is.element(noms,especes))
+  princ=data[,ind_princ]
+  return(princ)
+}
diff --git a/vmstools/R/c.listquote.r b/vmstools/R/c.listquote.r
index 7f879c7..c506b63 100644
--- a/vmstools/R/c.listquote.r
+++ b/vmstools/R/c.listquote.r
@@ -1,23 +1,42 @@
-c.listquote <- function( ... ) {
-
-   args <- as.list( match.call()[ -1 ] )
-   lstquote <- list( as.symbol( "list" ) );
-   for ( i in args ) {
-      # Evaluate expression in parent eviron to see what it refers to
-      if ( class( i ) == "name" || ( class( i ) == "call" && i[[1]] != "list" ) ) {
-         i <- eval( substitute( i ), sys.frame( sys.parent() ) )
-      }
-      if ( class( i ) == "call" && i[[1]] == "list" ) {
-         lstquote <- c( lstquote, as.list( i )[ -1 ] )
-      }
-      else if ( class( i ) == "character" )
-      {
-         for ( chr in i ) {
-            lstquote <- c( lstquote, list( parse( text=chr )[[1]] ) )
-         }
-      }
-      else
-         stop( paste( "[", deparse( substitute( i ) ), "] Unknown class [", class( i ), "] or is not a list()", sep="" ) )
-   }
-   return( as.call( lstquote ) )
-}
+#' Produce quoted list for use within data.table
+#' 
+#' Produce the quoted list format from a character vector which is used as
+#' input for the data.table format
+#' 
+#' character vector could contain e.g. colnames(x) or c("COL1","COL2","COL3")
+#' where COL1 etc are column names
+#' 
+#' @param \dots character vector
+#' @return Returnes quoted list
+#' @note With great thanks to original author.
+#' @author Niels T. Hintzen
+#' @seealso See \code{\link{data.table}} documentation
+#' @references See \code{\link{data.table}} documentation
+#' @examples
+#' 
+#' c.listquote(c("COL1","COL2","COL3")) #Returns: list(COL1,COL2,COL3)
+#' 
+#' @export c.listquote
+c.listquote <- function( ... ) {
+
+   args <- as.list( match.call()[ -1 ] )
+   lstquote <- list( as.symbol( "list" ) );
+   for ( i in args ) {
+      # Evaluate expression in parent eviron to see what it refers to
+      if ( class( i ) == "name" || ( class( i ) == "call" && i[[1]] != "list" ) ) {
+         i <- eval( substitute( i ), sys.frame( sys.parent() ) )
+      }
+      if ( class( i ) == "call" && i[[1]] == "list" ) {
+         lstquote <- c( lstquote, as.list( i )[ -1 ] )
+      }
+      else if ( class( i ) == "character" )
+      {
+         for ( chr in i ) {
+            lstquote <- c( lstquote, list( parse( text=chr )[[1]] ) )
+         }
+      }
+      else
+         stop( paste( "[", deparse( substitute( i ) ), "] Unknown class [", class( i ), "] or is not a list()", sep="" ) )
+   }
+   return( as.call( lstquote ) )
+}
diff --git a/vmstools/R/calc_mcp.r b/vmstools/R/calc_mcp.r
index 1da16f5..7b21e3c 100644
--- a/vmstools/R/calc_mcp.r
+++ b/vmstools/R/calc_mcp.r
@@ -1,95 +1,134 @@
-calc_mcp <- function (id = 1, points = NULL, filename = "MCP_Output.txt",
-    verbose = FALSE, pct = 100)
-{
-    require(adehabitat)
-    
-        #This is taken from gpclib package
-    mcp.area <- function(xy, id, percent = seq(20, 100, by = 5),
-                         unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){
-        unin <- match.arg(unin)
-        unout <- match.arg(unout)
-        if (length(id) != nrow(xy))
-            stop("xy and id should be of the same length")
-        xy <- xy[!is.na(xy[, 1]), ]
-        xy <- xy[!is.na(xy[, 2]), ]
-        id <- id[!is.na(xy[, 1])]
-        id <- id[!is.na(xy[, 2])]
-        lev <- percent
-        res <- list()
-        ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id)))
-        lixy <- split(xy, id)
-        le <- names(lixy)
-        for (i in 1:length(lev)) {
-            ar[i, ] <- unlist(lapply(lixy, function(z) {
-                res <- mcp(z, rep(1, nrow(z)), percent = lev[i])
-                class(res) <- "data.frame"
-                return(area.poly(as(res[, 2:3], "gpc.poly")))
-            }))
-        }
-        ar <- as.data.frame(ar)
-        names(ar) <- le
-        if (unin == "m") {
-            if (unout == "ha")
-                ar <- ar/10000
-            if (unout == "km2")
-                ar <- ar/1e+06
-        }
-        if (unin == "km") {
-            if (unout == "ha")
-                ar <- ar * 100
-            if (unout == "m2")
-                ar <- ar * 1e+06
-        }
-        row.names(ar) <- lev
-        class(ar) <- c("hrsize", "data.frame")
-        attr(ar, "units") <- unout
-        if (plotit)
-            plot(ar)
-        return(ar)
-    }
-
-
-    errorcode <- 1000
-    if ((pct > 100) || (pct < 0)) {
-        errorcode <- 100
-        if (verbose) {
-            cat("\n\nWARNING: Supplied percentage must be between 0 and 100 (inclusive).")
-            cat("\nERROR CODE: ", errorcode, "\n\n", sep = "")
-        }
-        return("ERROR")
-    }
-    if (length(dim(points)) != 2) {
-        errorcode <- 71
-        if (verbose) {
-            cat("\n\nWARNING: Provided points input matrix has fewer than 2 columns.")
-            cat("\nERROR CODE: ", errorcode, "\n\n", sep = "")
-        }
-        return("ERROR")
-    }
-    if (dim(points)[2] != 2) {
-        errorcode <- 70
-        if (verbose) {
-            cat("\n\nWARNING: Provided points input matrix has too many columns, only 2 are allowed.")
-            cat("\nERROR CODE: ", errorcode, "\n\n", sep = "")
-        }
-        return("ERROR")
-    }
-    else {
-        temp <- as.data.frame(cbind(1, points))
-        temp[, 1] <- as.factor(temp[, 1])
-        MCP <- (mcp(temp[, 2:3], temp[, 1], percent = pct))
-        area <- mcp.area(temp[, 2:3], temp[, 1], percent = pct,
-            unin = "m", unout = "km2", plotit = FALSE)
-    }
-    coordsMCP <- cbind(id, MCP)
-    tmp <- coordsMCP[, 2:4]
-    outtabMCP <- cbind(id, tmp)
-    write.table(outtabMCP, sep = ",", append = TRUE, file = filename,
-        col.names = FALSE)
-    r.MCP <- list(MCP = MCP, points = points, id = id, MCP.area = area,
-        MCP.pct = pct)
-    assign("r.MCP", r.MCP, pos = 1)
-    mcp.result <- list(id = id, MCP.area = area, MCP.pct = pct,
-        MCP.coords = MCP)
-    return(mcp.result)
-}
+#' Computing the Minimum Convex Polygon (MCP)
+#' 
+#' This function computes the Minimum Convex Polygon (MCP) from a set of
+#' points. The MCP is the minimum area polygon containing a set of point
+#' locations.
+#' 
+#' This function is most powerful when used repetitively within a loop to
+#' compute the MCP for subsets of points stored in a large data table.
+#' 
+#' @param id Provide a unique integer to identify an MCP from others that you
+#' may construct with other data points
+#' @param points Two-column matrix or data frame of point coordinates
+#' @param filename A character name for an ASCII output file
+#' @param verbose Boolean: set to TRUE if extended processing feedback is
+#' wanted
+#' @param pct Integer 0 <= pct <=100, the percentage of the MCP for which area
+#' is provided
+#' @return The returned result is a list: %% If it is a LIST, use
+#' \item{MCP.area }{The area of the MCP in square kilometers} \item{MCP.pct
+#' }{The desired percentage of the MCP for which the area is computed}
+#' \item{MPC.coords}{A matrix containing MCP vertices. Each row represents a
+#' unique point, the first column contains x-coordinates, and the second,
+#' y-coordinates }
+#' @note Results are stored in the r.MCP object (required for graphical
+#' visualization using plot_mcp). This function can be used on its own (once)
+#' or repetitively in a loop to process grouped point data stored in a larger
+#' table. When used repetitively, be sure to increment the id parameter to
+#' ensure that each MCP has a unique identifier. The output ASCII coordinate
+#' file can be further processed using the makeshapes function to generate an
+#' ESRI Shapefile for MCP polygons.
+#' @author Randy Bui, Ron N. Buliung, Tarmo K. Remmel
+#' @references Builds upon MCP functions available in the adehabitat package
+#' @examples
+#' 
+#' data(tacsat)
+#' calc_mcp(id=1, points = tacsat[1:10,c("SI_LONG","SI_LATI")], filename="MCP_Output.txt",
+#' verbose = FALSE, pct = 100)
+#' 
+#' @export calc_mcp
+calc_mcp <- function (id = 1, points = NULL, filename = "MCP_Output.txt",
+    verbose = FALSE, pct = 100)
+{
+    require(adehabitat)
+    
+        #This is taken from gpclib package
+    mcp.area <- function(xy, id, percent = seq(20, 100, by = 5),
+                         unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){
+        unin <- match.arg(unin)
+        unout <- match.arg(unout)
+        if (length(id) != nrow(xy))
+            stop("xy and id should be of the same length")
+        xy <- xy[!is.na(xy[, 1]), ]
+        xy <- xy[!is.na(xy[, 2]), ]
+        id <- id[!is.na(xy[, 1])]
+        id <- id[!is.na(xy[, 2])]
+        lev <- percent
+        res <- list()
+        ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id)))
+        lixy <- split(xy, id)
+        le <- names(lixy)
+        for (i in 1:length(lev)) {
+            ar[i, ] <- unlist(lapply(lixy, function(z) {
+                res <- mcp(z, rep(1, nrow(z)), percent = lev[i])
+                class(res) <- "data.frame"
+                return(area.poly(as(res[, 2:3], "gpc.poly")))
+            }))
+        }
+        ar <- as.data.frame(ar)
+        names(ar) <- le
+        if (unin == "m") {
+            if (unout == "ha")
+                ar <- ar/10000
+            if (unout == "km2")
+                ar <- ar/1e+06
+        }
+        if (unin == "km") {
+            if (unout == "ha")
+                ar <- ar * 100
+            if (unout == "m2")
+                ar <- ar * 1e+06
+        }
+        row.names(ar) <- lev
+        class(ar) <- c("hrsize", "data.frame")
+        attr(ar, "units") <- unout
+        if (plotit)
+            plot(ar)
+        return(ar)
+    }
+
+
+    errorcode <- 1000
+    if ((pct > 100) || (pct < 0)) {
+        errorcode <- 100
+        if (verbose) {
+            cat("\n\nWARNING: Supplied percentage must be between 0 and 100 (inclusive).")
+            cat("\nERROR CODE: ", errorcode, "\n\n", sep = "")
+        }
+        return("ERROR")
+    }
+    if (length(dim(points)) != 2) {
+        errorcode <- 71
+        if (verbose) {
+            cat("\n\nWARNING: Provided points input matrix has fewer than 2 columns.")
+            cat("\nERROR CODE: ", errorcode, "\n\n", sep = "")
+        }
+        return("ERROR")
+    }
+    if (dim(points)[2] != 2) {
+        errorcode <- 70
+        if (verbose) {
+            cat("\n\nWARNING: Provided points input matrix has too many columns, only 2 are allowed.")
+            cat("\nERROR CODE: ", errorcode, "\n\n", sep = "")
+        }
+        return("ERROR")
+    }
+    else {
+        temp <- as.data.frame(cbind(1, points))
+        temp[, 1] <- as.factor(temp[, 1])
+        MCP <- (mcp(temp[, 2:3], temp[, 1], percent = pct))
+        area <- mcp.area(temp[, 2:3], temp[, 1], percent = pct,
+            unin = "m", unout = "km2", plotit = FALSE)
+    }
+    coordsMCP <- cbind(id, MCP)
+    tmp <- coordsMCP[, 2:4]
+    outtabMCP <- cbind(id, tmp)
+    write.table(outtabMCP, sep = ",", append = TRUE, file = filename,
+        col.names = FALSE)
+    r.MCP <- list(MCP = MCP, points = points, id = id, MCP.area = area,
+        MCP.pct = pct)
+    assign("r.MCP", r.MCP, pos = 1)
+    mcp.result <- list(id = id, MCP.area = area, MCP.pct = pct,
+        MCP.coords = MCP)
+    return(mcp.result)
+}
diff --git a/vmstools/R/calculateCI.R b/vmstools/R/calculateCI.R
index 96fd392..00facc6 100644
--- a/vmstools/R/calculateCI.R
+++ b/vmstools/R/calculateCI.R
@@ -1,72 +1,120 @@
-calculateCI <- function(    int
-                               ,tacint
-                               ,params
-                               ,grid
-                               ,spatialGrid
-                               ,plot=FALSE){
-
-  if (!"SI_DATIM" %in% colnames(tacint))
-        tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME,
-            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-  mxr     <-  maxRangeCI(x  =c(int[-1,1][1],rev(int[-1,1])[1]),
-                         y  =c(int[-1,2][1],rev(int[-1,2])[1]),
-                         Time=an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")),
-                         speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 /
-                                                     an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2)))
-                                                     
-  if(plot){
-    par(mfrow=c(2,2))
-    plot(mxr[[1]][,1],mxr[[1]][,2],type="l",xlab="Longitude",ylab="Latitude",asp=1/lonLatRatio(mxr[[1]][1,1],mxr[[1]][1,2]),main=paste(int[1,]))
-    lines(int[-1,1],int[-1,2],col=2)
-  }
-  
-  xrange <- range(mxr[[1]][,1]); yrange <- range(mxr[[1]][,2])
-  xrg    <- range(int[-1,1]); yrg <- range(int[-1,2])
-  if(xrange[1] > xrg[1]) xrange[1] <- xrg[1] - diff(xrg)*0.1
-  if(xrange[2] < xrg[2]) xrange[2] <- xrg[2] + diff(xrg)*0.1
-  if(yrange[1] > yrg[1]) yrange[1] <- yrg[1] - diff(yrg)*0.1
-  if(yrange[2] < yrg[2]) yrange[2] <- yrg[2] - diff(yrg)*0.1
-
-  newxrange <- c((xrange[1] - grid@cellcentre.offset[1])                   %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1],
-                            (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1])
-  newyrange <- c((yrange[1] - grid@cellcentre.offset[2])                   %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2],
-                            (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2])
-
-  origxs    <- seq(grid@cellcentre.offset[1],grid@cellcentre.offset[1] + grid@cellsize[1] * (grid@cells.dim[1]-1),by= grid@cellsize[1])
-  origys    <- seq(grid@cellcentre.offset[2],grid@cellcentre.offset[2] + grid@cellsize[2] * (grid@cells.dim[2]-1),by= grid@cellsize[2])
-
-  if((min(newxrange) > bbox(spatialGrid)[1,"max"]) | (max(newxrange) < bbox(spatialGrid)[1,"min"])) stop("CI outside grid")
-  if((min(newyrange) > bbox(spatialGrid)[2,"max"]) | (max(newyrange) < bbox(spatialGrid)[2,"min"])) stop("CI outside grid")
-
-  subxs     <- apply(abs(outer(origxs,newxrange,"-")),2,which.min)
-  subys     <- apply(abs(outer(origys,newyrange,"-")),2,which.min)
-
-  grid      <- spatialGrid[sort(grid@cells.dim[2]-seq(subys[1],subys[2])+1),seq(subxs[1],subxs[2])]
-  grid      <- as(grid,"SpatialGridDataFrame")
-#  grid      <- createGrid(c((xrange[1] - grid@cellcentre.offset[1])                   %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1],
-#                            (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1]),
-#                          c((yrange[1] - grid@cellcentre.offset[2])                   %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2],
-#                            (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2]),
-#                          grid@cellsize[1],grid@cellsize[2],type="SpatialGridDataFrame")
-  coords    <- coordinates(grid)
-
-  # Distance to begin or end point
-  bpDistan  <- distance(tacint$SI_LONG[1],tacint$SI_LATI[1],coords[,1],coords[,2])
-  epDistan  <- distance(tacint$SI_LONG[2],tacint$SI_LATI[2],coords[,1],coords[,2])
-  pdistan   <- pmin(bpDistan,epDistan)
-  
-  if(plot){ image(t(matrix(pdistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()}
-
-  # Distance to interpolation
-  intDistan <- do.call(pmin,lapply(as.list(2:nrow(int)),function(x){distance(int[x,1],int[x,2],coords[,1],coords[,2])}))
-    
-  if(plot){ image(t(matrix(intDistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()}
-
-  CI        <- N1p0(intDistan*params$distscale,0,pdistan^params$sigline,0)
-  if(max(CI,na.rm=TRUE) < 0.1) warning("Prediction max(CI) is very small")
-
-  if(plot){ image(t(matrix(CI,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()}
-  grid@data      <- data.frame(data=rep(0,nrow(coords)))
-  grid@data$data <- CI
-return(grid)}
-                               
+#' Calculate the CI between two succeeding VMS datapoints
+#' 
+#' The interpolated tracks can be surrounded by a sort of confidence interval
+#' representing the outer region a vessel could have travelled between two
+#' succeeding datapoints. Within this function the CI's are computed.
+#' 
+#' 
+#' @param int interpolation, as data.frame from 'interpolateTacsat'
+#' @param tacint tacsat records (two rows) corresponding with the interpolation
+#' @param params list of parameters used to perform interpolation
+#' @param grid object of class 'GridTopology' specifying the grid dimensions
+#' @param plot Logical. Whether the result of the interpolation must be plotted
+#' @return Returns the Confidence Interval on a grid of class
+#' 'SpatialGridDataFrame' with the CI values in the data slot.
+#' @note Computation can take a very long time if either grid resolution is
+#' high or if many interpolations are used.
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{interpolateTacsat}},\code{\link{maxRangeCI}}
+#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using
+#' cubic Hermite spline interpolation of position registration data, EU lot 2
+#' project
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#'   #Sort the Tacsat data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Filter the Tacsat data
+#' tacsat          <- filterTacsat(tacsat,c(2,6),hd=NULL,remDup=TRUE)
+#' 
+#'   #Interpolate the VMS data
+#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,
+#'                     res=100,method="cHs",params=list(fm=0.5,distscale=20,
+#'                     sigline=0.2,st=c(2,6)),headingAdjustment=0)
+#' 
+#'   #Create the final grid where all interpolations should fit on
+#' xrange        <- c(2,3); yrange <- c(51,52)
+#' grid          <- createGrid(xrange,yrange,resx=0.01,resy=0.005)
+#' 
+#' res           <- calculateCI(interpolation[[4]],
+#'                              tacsat[interpolation[[4]][1,],],
+#'                              params=list(fm=0.25,distscale=3.1,sigline=0.4,st=c(2,6)),
+#'                              grid=grid,
+#'                              plot=TRUE)
+#' 
+#' @export calculateCI
+calculateCI <- function(    int
+                               ,tacint
+                               ,params
+                               ,grid
+                               ,spatialGrid
+                               ,plot=FALSE){
+
+  if (!"SI_DATIM" %in% colnames(tacint))
+        tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME,
+            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+  mxr     <-  maxRangeCI(x  =c(int[-1,1][1],rev(int[-1,1])[1]),
+                         y  =c(int[-1,2][1],rev(int[-1,2])[1]),
+                         Time=an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")),
+                         speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 /
+                                                     an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2)))
+                                                     
+  if(plot){
+    par(mfrow=c(2,2))
+    plot(mxr[[1]][,1],mxr[[1]][,2],type="l",xlab="Longitude",ylab="Latitude",asp=1/lonLatRatio(mxr[[1]][1,1],mxr[[1]][1,2]),main=paste(int[1,]))
+    lines(int[-1,1],int[-1,2],col=2)
+  }
+  
+  xrange <- range(mxr[[1]][,1]); yrange <- range(mxr[[1]][,2])
+  xrg    <- range(int[-1,1]); yrg <- range(int[-1,2])
+  if(xrange[1] > xrg[1]) xrange[1] <- xrg[1] - diff(xrg)*0.1
+  if(xrange[2] < xrg[2]) xrange[2] <- xrg[2] + diff(xrg)*0.1
+  if(yrange[1] > yrg[1]) yrange[1] <- yrg[1] - diff(yrg)*0.1
+  if(yrange[2] < yrg[2]) yrange[2] <- yrg[2] - diff(yrg)*0.1
+
+  newxrange <- c((xrange[1] - grid@cellcentre.offset[1])                   %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1],
+                            (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1])
+  newyrange <- c((yrange[1] - grid@cellcentre.offset[2])                   %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2],
+                            (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2])
+
+  origxs    <- seq(grid@cellcentre.offset[1],grid@cellcentre.offset[1] + grid@cellsize[1] * (grid@cells.dim[1]-1),by= grid@cellsize[1])
+  origys    <- seq(grid@cellcentre.offset[2],grid@cellcentre.offset[2] + grid@cellsize[2] * (grid@cells.dim[2]-1),by= grid@cellsize[2])
+
+  if((min(newxrange) > bbox(spatialGrid)[1,"max"]) | (max(newxrange) < bbox(spatialGrid)[1,"min"])) stop("CI outside grid")
+  if((min(newyrange) > bbox(spatialGrid)[2,"max"]) | (max(newyrange) < bbox(spatialGrid)[2,"min"])) stop("CI outside grid")
+
+  subxs     <- apply(abs(outer(origxs,newxrange,"-")),2,which.min)
+  subys     <- apply(abs(outer(origys,newyrange,"-")),2,which.min)
+
+  grid      <- spatialGrid[sort(grid@cells.dim[2]-seq(subys[1],subys[2])+1),seq(subxs[1],subxs[2])]
+  grid      <- as(grid,"SpatialGridDataFrame")
+#  grid      <- createGrid(c((xrange[1] - grid@cellcentre.offset[1])                   %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1],
+#                            (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1]),
+#                          c((yrange[1] - grid@cellcentre.offset[2])                   %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2],
+#                            (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2]),
+#                          grid@cellsize[1],grid@cellsize[2],type="SpatialGridDataFrame")
+  coords    <- coordinates(grid)
+
+  # Distance to begin or end point
+  bpDistan  <- distance(tacint$SI_LONG[1],tacint$SI_LATI[1],coords[,1],coords[,2])
+  epDistan  <- distance(tacint$SI_LONG[2],tacint$SI_LATI[2],coords[,1],coords[,2])
+  pdistan   <- pmin(bpDistan,epDistan)
+  
+  if(plot){ image(t(matrix(pdistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()}
+
+  # Distance to interpolation
+  intDistan <- do.call(pmin,lapply(as.list(2:nrow(int)),function(x){distance(int[x,1],int[x,2],coords[,1],coords[,2])}))
+    
+  if(plot){ image(t(matrix(intDistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()}
+
+  CI        <- N1p0(intDistan*params$distscale,0,pdistan^params$sigline,0)
+  if(max(CI,na.rm=TRUE) < 0.1) warning("Prediction max(CI) is very small")
+
+  if(plot){ image(t(matrix(CI,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()}
+  grid@data      <- data.frame(data=rep(0,nrow(coords)))
+  grid@data$data <- CI
+return(grid)}
+                               
diff --git a/vmstools/R/calculateSpeed.r b/vmstools/R/calculateSpeed.r
index dd1b905..6870867 100644
--- a/vmstools/R/calculateSpeed.r
+++ b/vmstools/R/calculateSpeed.r
@@ -1,106 +1,144 @@
-calculateSpeed <- function(tacsat,level="vessel",weight=c(1,1),fill.na=FALSE){
-
-  if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector")
-  weight <- weight / sum(weight,na.rm=TRUE)
-
-  #- Add interval between succeeding points
-  tacsatp <- intervalTacsat(tacsat,level=level,weight=weight,fill.na=fill.na)
-
-  if(!"SI_DATIM" %in% colnames(tacsatp)) tacsatp$SI_DATIM  <- as.POSIXct(paste(tacsatp$SI_DATE,  tacsatp$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-  if(level=="trip"){
-    if(is.null(tacsatp$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level")
-    sptacsat          <- split(tacsatp,tacsatp$VE_REF)
-    tacsatp$SI_SPCA   <- unlist(lapply(sptacsat,function(x){
-
-      FT_REF  <- as.factor(x$FT_REF);
-      res     <- by(x,FT_REF,
-                  function(y){
-                    if(nrow(y)>1){
-                      interval        <- y$INTV
-                      distance_xmin1  <- c(NA,distance(y$SI_LONG[2:nrow(y)],    y$SI_LATI[2:nrow(y)],
-                                                       y$SI_LONG[1:(nrow(y)-1)],y$SI_LATI[1:(nrow(y)-1)]))
-                      distance_xplus1 <- c(distance_xmin1[-1],NA)
-                      if(any(weight == 0)){
-                        if(weight[1] == 0) SI_SPCA       <- distance_xplus1 / (interval/60)
-                        if(weight[2] == 0) SI_SPCA       <- distance_xmin1 / (interval/60)
-                      } else {
-                          difftime_xmin1                 <- intervalTacsat(y,level="trip",weight=c(1,0),fill.na=fill.na)$INTV
-                          difftime_xplus1                <- intervalTacsat(y,level="trip",weight=c(0,1),fill.na=fill.na)$INTV
-                          SI_SPCA                        <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) /
-                                                                 (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60))))
-                        }
-                      #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
-                      #   weight setting.
-                      if(fill.na==TRUE){
-                        idx                               <- which(is.na(SI_SPCA)==TRUE)
-                        if(weight[1] == 0 | weight[2] == 0){
-                          speeds                          <- cbind((distance_xmin1[idx]   * weight[1]) / (interval[idx]/60),
-                                                                   (distance_xplus1[idx]  * weight[2]) / (interval[idx]/60))
-                        } else {
-                            speeds                        <- cbind((distance_xmin1[idx]   * weight[1]) / (difftime_xmin1[idx] /60),
-                                                                   (distance_xplus1[idx]  * weight[2]) / (difftime_xplus1[idx]/60))
-                          }
-                        #- Use of 0.5 if ifelse = NA, as counter to 2*...
-                        SI_SPCA[idx]                      <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))})
-                        #SI_SPCA[idx]                      <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) /
-                        #                                          (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1])   + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2])))
-                        if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA
-                      }
-
-                      return(SI_SPCA)
-                    } else {
-                        return(NA)
-                      }
-                  })
-                                          return(unsplit(res,FT_REF))}))
-    tacsatp$SI_SPCA[which(tacsatp$FT_REF == "0")] <- NA
-
-  }
-  if(level=="vessel"){
-    #- Take the interval for weights c(0,1) and c(1,0) direct from tacsat
-    interval                            <- tacsatp$INTV
-    distance_xmin1                      <- c(NA,distance(tacsatp$SI_LONG[2:nrow(tacsatp)],     tacsatp$SI_LATI[2:nrow(tacsatp)],
-                                                         tacsatp$SI_LONG[1:(nrow(tacsatp)-1)], tacsatp$SI_LATI[1:(nrow(tacsatp)-1)]))
-    distance_xplus1                     <- c(distance_xmin1[-1],NA)
-    #- Recalculate the interval rate for either outer averages
-    difftime_xmin1                      <- intervalTacsat(tacsatp,level="vessel",weight=c(1,0),fill.na=fill.na)$INTV
-    difftime_xplus1                     <- intervalTacsat(tacsatp,level="vessel",weight=c(0,1),fill.na=fill.na)$INTV
-    if(any(weight == 0)){
-      if(weight[1] == 0) SI_SPCA        <- distance_xplus1 / (interval/60)
-      if(weight[2] == 0) SI_SPCA        <- distance_xmin1 / (interval/60)
-    } else {
-        SI_SPCA                         <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) /
-                                                (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60))))
-      }
-    #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
-    #   weight setting.
-    if(fill.na==TRUE){
-      idx                               <- which(is.na(SI_SPCA)==TRUE)
-      if(weight[1] == 0 | weight[2] == 0){
-        speeds                          <- cbind((distance_xmin1[idx]   * weight[1]) / (interval[idx]/60),
-                                                 (distance_xplus1[idx]  * weight[2]) / (interval[idx]/60))
-      } else {
-          speeds                        <- cbind((distance_xmin1[idx]   * weight[1]) / (difftime_xmin1[idx] /60),
-                                                 (distance_xplus1[idx]  * weight[2]) / (difftime_xplus1[idx]/60))
-        }
-      #- Use of 0.5 if ifelse = NA, as counter to 2*...
-      SI_SPCA[idx]                      <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))})
-      #SI_SPCA[idx]                      <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) /
-      #                                          (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1])   + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2]))))
-      if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA
-    }
-    tacsatp$SI_SPCA                 <- SI_SPCA
-
-    vessels                         <- unique(tacsatp$VE_REF)
-    first.vessels                   <- unlist(lapply(as.list(vessels),function(x){which(tacsatp$VE_REF==x)[1]}))
-    last.vessels                    <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsatp$VE_REF==x))[1]}))
-    if(weight[1] != 0) tacsatp$SI_SPCA[first.vessels]      <- NA
-    if(weight[2] != 0) tacsatp$SI_SPCA[last.vessels]       <- NA
-    if(fill.na==TRUE) tacsatp$SI_SPCA[first.vessels]          <- distance_xplus1[first.vessels] / (interval[first.vessels]/60)
-    if(fill.na==TRUE) tacsatp$SI_SPCA[last.vessels]           <- distance_xmin1[last.vessels] / (interval[last.vessels]/60)
-
-  }
-  idx <- which(colnames(tacsatp) %in% c("INTV"))
-  #- Convert from km/h to knots / nautical miles an hour
-  tacsatp$SI_SPCA     <- tacsatp$SI_SPCA / 1.852
-return(if(length(idx)>0){tacsatp[,-idx]}else{tacsatp})}
\ No newline at end of file
+#' Calculate the speed of the vessel at each ping
+#' 
+#' Calculate the speed of a vessel based on the time and distance travelled
+#' between pings.
+#' 
+#' Note that the DEFAULT speed given is the difference between ping x and ping
+#' x-1. Hence, the first ping of a vessel or trip does NOT have a calculated
+#' speed and will display NA.
+#' 
+#' With weight you can specify if the speed is calculated between ping x and
+#' ping x-1 (weight = c(1,0)), if the speed is calculated between ping x and
+#' ping x+1 (weight = c(0,1)) or an intermediate weight (weight = c(0.4,0.6) /
+#' equal weight = c(0.5,0.5)).
+#' 
+#' @param tacsat tacsat dataset
+#' @param level level to get calculated speed at: trip or vessel
+#' @param weight weight to apply to calculation of mean speed towards and away
+#' from ping
+#' @param fill.na If speed cannot be calculated based on default or provided
+#' weight, take closest alternative to provide a speed
+#' @return The original tacsat file is returned including a column: SI_SPCA
+#' which holds the calculated speed in knots
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{interpolateTacsat}},\code{\link{intervalTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' result <- calculateSpeed(tacsat[1:100,],level="vessel")
+#' result <- calculateSpeed(tacsat[1:100,],level="vessel",weight=c(2,1),fill.na=TRUE)
+#' 
+#' data(eflalo)
+#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' result  <- calculateSpeed(tacsatp[1:100,],level="trip",weight=c(1,1),fill.na=FALSE)
+#' 
+#' 
+#' 
+#' @export calculateSpeed
+calculateSpeed <- function(tacsat,level="vessel",weight=c(1,1),fill.na=FALSE){
+
+  if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector")
+  weight <- weight / sum(weight,na.rm=TRUE)
+
+  #- Add interval between succeeding points
+  tacsatp <- intervalTacsat(tacsat,level=level,weight=weight,fill.na=fill.na)
+
+  if(!"SI_DATIM" %in% colnames(tacsatp)) tacsatp$SI_DATIM  <- as.POSIXct(paste(tacsatp$SI_DATE,  tacsatp$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+  if(level=="trip"){
+    if(is.null(tacsatp$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level")
+    sptacsat          <- split(tacsatp,tacsatp$VE_REF)
+    tacsatp$SI_SPCA   <- unlist(lapply(sptacsat,function(x){
+
+      FT_REF  <- as.factor(x$FT_REF);
+      res     <- by(x,FT_REF,
+                  function(y){
+                    if(nrow(y)>1){
+                      interval        <- y$INTV
+                      distance_xmin1  <- c(NA,distance(y$SI_LONG[2:nrow(y)],    y$SI_LATI[2:nrow(y)],
+                                                       y$SI_LONG[1:(nrow(y)-1)],y$SI_LATI[1:(nrow(y)-1)]))
+                      distance_xplus1 <- c(distance_xmin1[-1],NA)
+                      if(any(weight == 0)){
+                        if(weight[1] == 0) SI_SPCA       <- distance_xplus1 / (interval/60)
+                        if(weight[2] == 0) SI_SPCA       <- distance_xmin1 / (interval/60)
+                      } else {
+                          difftime_xmin1                 <- intervalTacsat(y,level="trip",weight=c(1,0),fill.na=fill.na)$INTV
+                          difftime_xplus1                <- intervalTacsat(y,level="trip",weight=c(0,1),fill.na=fill.na)$INTV
+                          SI_SPCA                        <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) /
+                                                                 (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60))))
+                        }
+                      #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
+                      #   weight setting.
+                      if(fill.na==TRUE){
+                        idx                               <- which(is.na(SI_SPCA)==TRUE)
+                        if(weight[1] == 0 | weight[2] == 0){
+                          speeds                          <- cbind((distance_xmin1[idx]   * weight[1]) / (interval[idx]/60),
+                                                                   (distance_xplus1[idx]  * weight[2]) / (interval[idx]/60))
+                        } else {
+                            speeds                        <- cbind((distance_xmin1[idx]   * weight[1]) / (difftime_xmin1[idx] /60),
+                                                                   (distance_xplus1[idx]  * weight[2]) / (difftime_xplus1[idx]/60))
+                          }
+                        #- Use of 0.5 if ifelse = NA, as counter to 2*...
+                        SI_SPCA[idx]                      <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))})
+                        #SI_SPCA[idx]                      <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) /
+                        #                                          (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1])   + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2])))
+                        if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA
+                      }
+
+                      return(SI_SPCA)
+                    } else {
+                        return(NA)
+                      }
+                  })
+                                          return(unsplit(res,FT_REF))}))
+    tacsatp$SI_SPCA[which(tacsatp$FT_REF == "0")] <- NA
+
+  }
+  if(level=="vessel"){
+    #- Take the interval for weights c(0,1) and c(1,0) direct from tacsat
+    interval                            <- tacsatp$INTV
+    distance_xmin1                      <- c(NA,distance(tacsatp$SI_LONG[2:nrow(tacsatp)],     tacsatp$SI_LATI[2:nrow(tacsatp)],
+                                                         tacsatp$SI_LONG[1:(nrow(tacsatp)-1)], tacsatp$SI_LATI[1:(nrow(tacsatp)-1)]))
+    distance_xplus1                     <- c(distance_xmin1[-1],NA)
+    #- Recalculate the interval rate for either outer averages
+    difftime_xmin1                      <- intervalTacsat(tacsatp,level="vessel",weight=c(1,0),fill.na=fill.na)$INTV
+    difftime_xplus1                     <- intervalTacsat(tacsatp,level="vessel",weight=c(0,1),fill.na=fill.na)$INTV
+    if(any(weight == 0)){
+      if(weight[1] == 0) SI_SPCA        <- distance_xplus1 / (interval/60)
+      if(weight[2] == 0) SI_SPCA        <- distance_xmin1 / (interval/60)
+    } else {
+        SI_SPCA                         <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) /
+                                                (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60))))
+      }
+    #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
+    #   weight setting.
+    if(fill.na==TRUE){
+      idx                               <- which(is.na(SI_SPCA)==TRUE)
+      if(weight[1] == 0 | weight[2] == 0){
+        speeds                          <- cbind((distance_xmin1[idx]   * weight[1]) / (interval[idx]/60),
+                                                 (distance_xplus1[idx]  * weight[2]) / (interval[idx]/60))
+      } else {
+          speeds                        <- cbind((distance_xmin1[idx]   * weight[1]) / (difftime_xmin1[idx] /60),
+                                                 (distance_xplus1[idx]  * weight[2]) / (difftime_xplus1[idx]/60))
+        }
+      #- Use of 0.5 if ifelse = NA, as counter to 2*...
+      SI_SPCA[idx]                      <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))})
+      #SI_SPCA[idx]                      <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) /
+      #                                          (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1])   + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2]))))
+      if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA
+    }
+    tacsatp$SI_SPCA                 <- SI_SPCA
+
+    vessels                         <- unique(tacsatp$VE_REF)
+    first.vessels                   <- unlist(lapply(as.list(vessels),function(x){which(tacsatp$VE_REF==x)[1]}))
+    last.vessels                    <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsatp$VE_REF==x))[1]}))
+    if(weight[1] != 0) tacsatp$SI_SPCA[first.vessels]      <- NA
+    if(weight[2] != 0) tacsatp$SI_SPCA[last.vessels]       <- NA
+    if(fill.na==TRUE) tacsatp$SI_SPCA[first.vessels]          <- distance_xplus1[first.vessels] / (interval[first.vessels]/60)
+    if(fill.na==TRUE) tacsatp$SI_SPCA[last.vessels]           <- distance_xmin1[last.vessels] / (interval[last.vessels]/60)
+
+  }
+  idx <- which(colnames(tacsatp) %in% c("INTV"))
+  #- Convert from km/h to knots / nautical miles an hour
+  tacsatp$SI_SPCA     <- tacsatp$SI_SPCA / 1.852
+return(if(length(idx)>0){tacsatp[,-idx]}else{tacsatp})}
diff --git a/vmstools/R/clipObs2Tacsat.r b/vmstools/R/clipObs2Tacsat.r
index d4f8704..1924bc6 100644
--- a/vmstools/R/clipObs2Tacsat.r
+++ b/vmstools/R/clipObs2Tacsat.r
@@ -1,196 +1,251 @@
-clipObs2Tacsat <- function(tacsat,        #The tacsat dataset
-                           obs,           #The observation dataset
-                           method="grid", #The method used, on 'grid' or 'euclidean' distance
-                           control.grid=list(spatGrid=NULL,resx=NULL,resy=NULL,gridBbox="obs"), #gridBbox: whether bounding box should come from tacsat or observations
-                           control.euclidean=list(threshold=NULL), #all.t = all.tacsat
-                           temporalRange=NULL,#The range in which tacsat records may deviate from the observation time stamp
-                           all.t=FALSE,
-                           rowSize=1000
-                           ){
-
-tacsat      <- sortTacsat(tacsat)
-obs         <- sortTacsat(obs)
-
-#- If you want to get the full tacsat dataset back, make a copy of it first
-if(all.t){
-  tacsat$ID   <- 1:nrow(tacsat)
-  tacsatOrig  <- tacsat
-}
-
-if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM   <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-if(!"SI_DATIM" %in% colnames(obs))    obs$SI_DATIM      <- as.POSIXct(paste(obs$SI_DATE,    obs$SI_TIME,    sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-
-#- Subset tacsat that can never have match with obs because of temporal or space ranges
-if(method == "euclidean" & is.null(control.euclidean$threshold)==FALSE){
-  rix         <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[1],range(obs$SI_LATI,na.rm=TRUE)[1],control.euclidean$threshold)
-  minXobs     <- range(obs$SI_LONG,na.rm=TRUE)[1] - rix
-  raiy        <- control.euclidean$threshold/111.1949
-  minYobs     <- range(obs$SI_LATI,na.rm=TRUE)[1] - raiy
-  rax         <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[2],range(obs$SI_LATI,na.rm=TRUE)[2],control.euclidean$threshold)
-  maxXobs     <- range(obs$SI_LONG,na.rm=TRUE)[2] + rax
-  maxYobs     <- range(obs$SI_LATI,na.rm=TRUE)[2] + raiy
-  tacsat      <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs)
-}
-if(method == "grid"){
-  minXobs     <- range(obs$SI_LONG,na.rm=TRUE)[1] - control.grid$resx
-  maxXobs     <- range(obs$SI_LONG,na.rm=TRUE)[2] + control.grid$resx
-  minYobs     <- range(obs$SI_LATI,na.rm=TRUE)[1] - control.grid$resy
-  maxYobs     <- range(obs$SI_LATI,na.rm=TRUE)[2] + control.grid$resy
-  tacsat      <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs)
-}
-
-if(is.null(temporalRange)==FALSE){
-  minTobs     <- range(obs$SI_DATIM,na.rm=TRUE)[1] + temporalRange[1]*60
-  maxTobs     <- range(obs$SI_DATIM,na.rm=TRUE)[2] + temporalRange[2]*60
-  tacsat      <- subset(tacsat,SI_DATIM >= minTobs & SI_DATIM <= maxTobs)
-}
-if(nrow(tacsat)==0) stop("Number of tacsat records that are within reach of obs dataset is zero")
-
-
-#- Gridcell wanted, but not given yet, so create one
-if(method == "grid" & is.null(control.grid$spatGrid) == TRUE){
-  if(is.null(control.grid$resx) == TRUE | is.null(control.grid$resy) == TRUE) stop("Method selected needs resx and resy statements")
-
-  xrangeO      <- range(obs$SI_LONG,na.rm=TRUE); xrangeT <- range(tacsat$SI_LONG,na.rm=TRUE)
-  yrangeO      <- range(obs$SI_LATI,na.rm=TRUE); yrangeT <- range(tacsat$SI_LATI,na.rm=TRUE)
-  
-  if(control.grid$gridBbox == "obs")     spatGrid    <- createGrid(xrangeO,yrangeO,control.grid$resx,control.grid$resx,type="SpatialGrid")
-  if(control.grid$gridBbox == "tacsat")  spatGrid    <- createGrid(xrangeT,yrangeT,control.grid$resy,control.grid$resy,type="SpatialGrid")
-  control.grid$spatGrid                              <- spatGrid
-}
-
-#- Perform calculations on gridcell
-if(method == "grid" & is.null(control.grid$spatGrid) == FALSE){
-  sPDFObs     <- SpatialPointsDataFrame(data.frame(cbind(obs$SI_LONG,obs$SI_LATI)),data=obs)
-  sPDFTac     <- SpatialPointsDataFrame(data.frame(cbind(tacsat$SI_LONG,tacsat$SI_LATI)),data=tacsat)
-  resObs      <- over(sPDFObs,spatGrid)
-  resTac      <- over(sPDFTac,spatGrid)
-
-  idxObs      <- SpatialPoints(spatGrid)@coords[resObs,]
-  idxTac      <- SpatialPoints(spatGrid)@coords[resTac,]
-  
-  obs$GR_LONG     <- idxObs[,1];  obs$GR_LATI     <- idxObs[,2]
-  tacsat$GR_LONG  <- idxTac[,1];  tacsat$GR_LATI  <- idxTac[,2]
-
-  tacsat$GR_ID<- resTac;
-  obs$GR_ID   <- resObs;
-
-  
-  if(is.null(temporalRange)==FALSE){
-
-    res       <- do.call(c,lapply(as.list(1:nrow(obs)),function(x){     res        <- which(resTac == resObs[x]);
-                                                                        restime    <- difftime(tacsat$SI_DATIM[res],obs$SI_DATIM[x],units="mins");
-                                                                        #retrn      <- ifelse(restime <= temporalRange[2] & restime >=temporalRange[1],resObs[x],NA)
-                                                                        retrn      <- which(restime <= temporalRange[2] & restime >= temporalRange[1])
-                                                       return(res[retrn])}))
-
-
-    retrn       <- list(obs,tacsat[res,])
-  } else {
-      retrn       <- list(obs,tacsat)
-    }
-}
-
-#- Perform calculation by Euclidian distance
-if(method == "euclidean"){
-
-  obs$GR_ID   <- 1:nrow(obs)
-
-  #- Create storage of tacsat records to save
-  totRes      <- cbind(numeric(), numeric())
-
-  obsLon      <- obs$SI_LONG
-  obsLat      <- obs$SI_LATI
-  tacLon      <- tacsat$SI_LONG
-  tacLat      <- tacsat$SI_LATI
-
-  #- Chop it up into pieces to speed up the calculations
-  nChunkObs   <- ceiling(nrow(obs)    /rowSize)
-  nChunkTac   <- ceiling(nrow(tacsat) /rowSize)
-  for(iNO in 1:nChunkObs){
-    if(iNO == nChunkObs){
-      ox <- obsLon[(iNO*rowSize - rowSize + 1):length(obsLon)]
-      oy <- obsLat[(iNO*rowSize - rowSize + 1):length(obsLat)]
-    } else {
-        ox <- obsLon[(iNO*rowSize - rowSize + 1):(iNO*rowSize)]
-        oy <- obsLat[(iNO*rowSize - rowSize + 1):(iNO*rowSize)]
-      }
-    for(iNT in 1:nChunkTac){
-      if(iNT == nChunkTac){
-        tx <- tacLon[(iNT*rowSize - rowSize + 1):length(tacLon)]
-        ty <- tacLat[(iNT*rowSize - rowSize + 1):length(tacLat)]
-      } else {
-          tx <- tacLon[(iNT*rowSize - rowSize + 1):(iNT*rowSize)]
-          ty <- tacLat[(iNT*rowSize - rowSize + 1):(iNT*rowSize)]
-        }
-
-      minXobs     <- range(ox,na.rm=TRUE)[1] - rix
-      minYobs     <- range(oy,na.rm=TRUE)[1] - raiy
-      maxXobs     <- range(ox,na.rm=TRUE)[2] + rax
-      maxYobs     <- range(oy,na.rm=TRUE)[2] + raiy
-      cont        <- ifelse(length(which(tx >= minXobs & tx <= maxXobs & ty >= minYobs & ty <= maxYobs))>0,TRUE,FALSE)
-
-      if(cont){
-
-      #- Check if the length of both sets are equal or not
-      if(iNO == nChunkObs | iNT == nChunkTac){
-
-        #- Get both the minimum distance, but also the index of the tacsat record associated
-        res                                                   <- do.call(rbind,
-                                                                         lapply(as.list(1:length((iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize)))),
-                                                                                function(x){
-                                                                                    #- Get the row numbers of the full observation and tacsat set used here
-                                                                                    obsRows     <- (iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize))
-                                                                                    if(length(tx)<rowSize){  tacRows <- ((iNT*rowSize - rowSize + 1):(length(tacLon))) } else { tacRows <- ((iNT*rowSize - rowSize + 1):(iNT*rowSize))}
-
-                                                                                    #- Calculate the distance between the observation and tacsat dataset
-                                                                                    distObsTac  <- distance(ox[x],oy[x],tx,ty);
-                                                                                    if(is.null(control.euclidean$threshold)==FALSE){ idx <- which(distObsTac <= control.euclidean$threshold)} else { idx <- 1:length(distObsTac) }
-
-                                                                                    restime     <- difftime(tacsat$SI_DATIM[tacRows[idx]],obs$SI_DATIM[obsRows[x]],units="mins")
-                                                                                    if(is.null(temporalRange)==FALSE){ retrn       <- which(restime <= temporalRange[2] & restime >= temporalRange[1])
-                                                                                    } else { retrn <- 1:length(restime) }
-                                                                                    if(length(tacRows[idx[retrn]])>0){ toReturn <- cbind(tacRows[idx[retrn]],obs$GR_ID[obsRows[x]])
-                                                                                    } else { toReturn <- cbind(NA,NA) }
-
-                                                                                return(toReturn)}))
-
-
-
-        totRes  <- rbind(totRes,na.omit(res))
-      }
-      if(iNO != nChunkObs & iNT != nChunkTac){
-
-       obsRows            <- ((iNO*rowSize - rowSize +1):(iNO*rowSize))
-       tacRows            <- (((iNT*rowSize - rowSize + 1):(iNT*rowSize)))
-       res                <- outer(1:rowSize,1:rowSize,FUN=
-                                   function(x,y){
-                                      distObsTac  <- distance(ox[x],oy[x],tx[y],ty[y])
-                                   return(distObsTac)})
-
-       idx              <- apply(res,2,function(x){return(x <= control.euclidean$threshold)})
-       idx              <- which(idx == TRUE,arr.ind=TRUE)
-       restime          <- difftime(tacsat$SI_DATIM[tacRows[idx[,2]]],obs$SI_DATIM[obsRows[idx[,1]]],units="mins")
-
-       if(is.null(temporalRange)==FALSE){ retrn       <- which(restime <= temporalRange[2] & restime >= temporalRange[1])
-       } else { retrn <- 1:length(restime) }
-       if(length(retrn)>0){
-         res              <- cbind(tacRows[idx[retrn,2]],obs$GR_ID[obsRows[idx[retrn,1]]])
-         totRes           <- rbind(totRes,na.omit(res))
-       }
-      }
-    }
-      
-    }#End iNT loop
-  }#End iNO loop
-  tacsat$GR_ID    <- NA
-  dubTacsat       <- tacsat[totRes[,1],]
-  dubTacsat$GR_ID <- totRes[,2]
-
-
-  retrn   <- list(obs,dubTacsat)
-}#End method euclidean
-
-if(all.t) retrn[[2]] <- merge(retrn[[2]],tacsatOrig,by=colnames(tacsatOrig),all=TRUE)
-
-return(retrn)}
+#' Find tacsat records close to observation data records
+#' 
+#' Find the tacsat records that are close in distance or on a defined grid to
+#' the observation data positions in both time and space
+#' 
+#' Returned is a list with the first element being the observation dataset with
+#' an GR_ID identifier that matches the GR_ID from the matching tacsat records.
+#' The second element is the tacsat records with the same GR_IDs.
+#' 
+#' @param tacsat Dataframe of vessel VMS in tacsat format
+#' @param obs Dataframe also in tacsat format, but with observation data (GPS
+#' and Date+Time)
+#' @param method 'Grid' or 'Euclidian'. Match tacsat records in same gridcell
+#' (method = "grid") or only x km from observed datapoint (method =
+#' "euclidean")
+#' @param control.grid If method = "grid", control.grid is a list with
+#' specification of the spatialGrid (spatGrid, of class "SpatialGrid") or the
+#' resolution in x and y values where the outermargins can be given by the
+#' tacsat dataset (gridBox="tacsat") or the observation dataset
+#' (gridBox="obs").
+#' 
+#' Example: control.grid=list(spatGrid = NULL, resx = c(0.5,1), resy = NULL,
+#' gridBbox = "obs")
+#' @param control.euclidean If method = "euclidean" then a maximum distance the
+#' tacsat position may deviate from the observed datapoint needs to be given
+#' (in km).
+#' @param temporalRange The range in which tacsat records may deviate from the
+#' observation time stamp (in minutes)
+#' @param all.t If you want to return all tacsat records, all.t=T (return =
+#' list, first element = observation set, second = matching tacsat), else F
+#' second = all tacsat
+#' @param rowSize To speed up calculations, define maximum rowSize (default =
+#' 1000)
+#' @author Niels T. Hintzen
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat        <- sortTacsat(tacsat)
+#' 
+#' obs           <- tacsat[round(seq(1,nrow(tacsat),length.out=30)),]
+#' obs           <- obs[,c(1,2,3,4,5,6)]
+#' obs$OB_TYP    <- "Benthos"
+#' colnames(obs) <- c("OB_COU","OB_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","OB_TYP")
+#' newTime       <- obs$SI_DATIM - runif(30,-60*20,60*20)
+#' obs$SI_LATI   <- jitter(obs$SI_LATI,factor=0.25)
+#' obs$SI_LONG   <- jitter(obs$SI_LONG,factor=0.5)
+#' 
+#' 
+#' a <- clipObs2Tacsat(tacsat,obs,method="grid",control.grid=list(resx=0.1,resy=0.05,
+#'                     gridBbox="obs"),temporalRange=c(-30,120),all.t=FALSE)
+#' a <- clipObs2Tacsat(tacsat,obs,method="euclidean",control.euclidean=list(threshold=1),
+#'                     temporalRange=c(-1e10,-1) ,all.t=FALSE)
+#' 
+#' @export clipObs2Tacsat
+clipObs2Tacsat <- function(tacsat,        #The tacsat dataset
+                           obs,           #The observation dataset
+                           method="grid", #The method used, on 'grid' or 'euclidean' distance
+                           control.grid=list(spatGrid=NULL,resx=NULL,resy=NULL,gridBbox="obs"), #gridBbox: whether bounding box should come from tacsat or observations
+                           control.euclidean=list(threshold=NULL), #all.t = all.tacsat
+                           temporalRange=NULL,#The range in which tacsat records may deviate from the observation time stamp
+                           all.t=FALSE,
+                           rowSize=1000
+                           ){
+
+tacsat      <- sortTacsat(tacsat)
+obs         <- sortTacsat(obs)
+
+#- If you want to get the full tacsat dataset back, make a copy of it first
+if(all.t){
+  tacsat$ID   <- 1:nrow(tacsat)
+  tacsatOrig  <- tacsat
+}
+
+if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM   <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+if(!"SI_DATIM" %in% colnames(obs))    obs$SI_DATIM      <- as.POSIXct(paste(obs$SI_DATE,    obs$SI_TIME,    sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+
+#- Subset tacsat that can never have match with obs because of temporal or space ranges
+if(method == "euclidean" & is.null(control.euclidean$threshold)==FALSE){
+  rix         <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[1],range(obs$SI_LATI,na.rm=TRUE)[1],control.euclidean$threshold)
+  minXobs     <- range(obs$SI_LONG,na.rm=TRUE)[1] - rix
+  raiy        <- control.euclidean$threshold/111.1949
+  minYobs     <- range(obs$SI_LATI,na.rm=TRUE)[1] - raiy
+  rax         <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[2],range(obs$SI_LATI,na.rm=TRUE)[2],control.euclidean$threshold)
+  maxXobs     <- range(obs$SI_LONG,na.rm=TRUE)[2] + rax
+  maxYobs     <- range(obs$SI_LATI,na.rm=TRUE)[2] + raiy
+  tacsat      <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs)
+}
+if(method == "grid"){
+  minXobs     <- range(obs$SI_LONG,na.rm=TRUE)[1] - control.grid$resx
+  maxXobs     <- range(obs$SI_LONG,na.rm=TRUE)[2] + control.grid$resx
+  minYobs     <- range(obs$SI_LATI,na.rm=TRUE)[1] - control.grid$resy
+  maxYobs     <- range(obs$SI_LATI,na.rm=TRUE)[2] + control.grid$resy
+  tacsat      <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs)
+}
+
+if(is.null(temporalRange)==FALSE){
+  minTobs     <- range(obs$SI_DATIM,na.rm=TRUE)[1] + temporalRange[1]*60
+  maxTobs     <- range(obs$SI_DATIM,na.rm=TRUE)[2] + temporalRange[2]*60
+  tacsat      <- subset(tacsat,SI_DATIM >= minTobs & SI_DATIM <= maxTobs)
+}
+if(nrow(tacsat)==0) stop("Number of tacsat records that are within reach of obs dataset is zero")
+
+
+#- Gridcell wanted, but not given yet, so create one
+if(method == "grid" & is.null(control.grid$spatGrid) == TRUE){
+  if(is.null(control.grid$resx) == TRUE | is.null(control.grid$resy) == TRUE) stop("Method selected needs resx and resy statements")
+
+  xrangeO      <- range(obs$SI_LONG,na.rm=TRUE); xrangeT <- range(tacsat$SI_LONG,na.rm=TRUE)
+  yrangeO      <- range(obs$SI_LATI,na.rm=TRUE); yrangeT <- range(tacsat$SI_LATI,na.rm=TRUE)
+  
+  if(control.grid$gridBbox == "obs")     spatGrid    <- createGrid(xrangeO,yrangeO,control.grid$resx,control.grid$resx,type="SpatialGrid")
+  if(control.grid$gridBbox == "tacsat")  spatGrid    <- createGrid(xrangeT,yrangeT,control.grid$resy,control.grid$resy,type="SpatialGrid")
+  control.grid$spatGrid                              <- spatGrid
+}
+
+#- Perform calculations on gridcell
+if(method == "grid" & is.null(control.grid$spatGrid) == FALSE){
+  sPDFObs     <- SpatialPointsDataFrame(data.frame(cbind(obs$SI_LONG,obs$SI_LATI)),data=obs)
+  sPDFTac     <- SpatialPointsDataFrame(data.frame(cbind(tacsat$SI_LONG,tacsat$SI_LATI)),data=tacsat)
+  resObs      <- over(sPDFObs,spatGrid)
+  resTac      <- over(sPDFTac,spatGrid)
+
+  idxObs      <- SpatialPoints(spatGrid)@coords[resObs,]
+  idxTac      <- SpatialPoints(spatGrid)@coords[resTac,]
+  
+  obs$GR_LONG     <- idxObs[,1];  obs$GR_LATI     <- idxObs[,2]
+  tacsat$GR_LONG  <- idxTac[,1];  tacsat$GR_LATI  <- idxTac[,2]
+
+  tacsat$GR_ID<- resTac;
+  obs$GR_ID   <- resObs;
+
+  
+  if(is.null(temporalRange)==FALSE){
+
+    res       <- do.call(c,lapply(as.list(1:nrow(obs)),function(x){     res        <- which(resTac == resObs[x]);
+                                                                        restime    <- difftime(tacsat$SI_DATIM[res],obs$SI_DATIM[x],units="mins");
+                                                                        #retrn      <- ifelse(restime <= temporalRange[2] & restime >=temporalRange[1],resObs[x],NA)
+                                                                        retrn      <- which(restime <= temporalRange[2] & restime >= temporalRange[1])
+                                                       return(res[retrn])}))
+
+
+    retrn       <- list(obs,tacsat[res,])
+  } else {
+      retrn       <- list(obs,tacsat)
+    }
+}
+
+#- Perform calculation by Euclidian distance
+if(method == "euclidean"){
+
+  obs$GR_ID   <- 1:nrow(obs)
+
+  #- Create storage of tacsat records to save
+  totRes      <- cbind(numeric(), numeric())
+
+  obsLon      <- obs$SI_LONG
+  obsLat      <- obs$SI_LATI
+  tacLon      <- tacsat$SI_LONG
+  tacLat      <- tacsat$SI_LATI
+
+  #- Chop it up into pieces to speed up the calculations
+  nChunkObs   <- ceiling(nrow(obs)    /rowSize)
+  nChunkTac   <- ceiling(nrow(tacsat) /rowSize)
+  for(iNO in 1:nChunkObs){
+    if(iNO == nChunkObs){
+      ox <- obsLon[(iNO*rowSize - rowSize + 1):length(obsLon)]
+      oy <- obsLat[(iNO*rowSize - rowSize + 1):length(obsLat)]
+    } else {
+        ox <- obsLon[(iNO*rowSize - rowSize + 1):(iNO*rowSize)]
+        oy <- obsLat[(iNO*rowSize - rowSize + 1):(iNO*rowSize)]
+      }
+    for(iNT in 1:nChunkTac){
+      if(iNT == nChunkTac){
+        tx <- tacLon[(iNT*rowSize - rowSize + 1):length(tacLon)]
+        ty <- tacLat[(iNT*rowSize - rowSize + 1):length(tacLat)]
+      } else {
+          tx <- tacLon[(iNT*rowSize - rowSize + 1):(iNT*rowSize)]
+          ty <- tacLat[(iNT*rowSize - rowSize + 1):(iNT*rowSize)]
+        }
+
+      minXobs     <- range(ox,na.rm=TRUE)[1] - rix
+      minYobs     <- range(oy,na.rm=TRUE)[1] - raiy
+      maxXobs     <- range(ox,na.rm=TRUE)[2] + rax
+      maxYobs     <- range(oy,na.rm=TRUE)[2] + raiy
+      cont        <- ifelse(length(which(tx >= minXobs & tx <= maxXobs & ty >= minYobs & ty <= maxYobs))>0,TRUE,FALSE)
+
+      if(cont){
+
+      #- Check if the length of both sets are equal or not
+      if(iNO == nChunkObs | iNT == nChunkTac){
+
+        #- Get both the minimum distance, but also the index of the tacsat record associated
+        res                                                   <- do.call(rbind,
+                                                                         lapply(as.list(1:length((iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize)))),
+                                                                                function(x){
+                                                                                    #- Get the row numbers of the full observation and tacsat set used here
+                                                                                    obsRows     <- (iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize))
+                                                                                    if(length(tx)<rowSize){  tacRows <- ((iNT*rowSize - rowSize + 1):(length(tacLon))) } else { tacRows <- ((iNT*rowSize - rowSize + 1):(iNT*rowSize))}
+
+                                                                                    #- Calculate the distance between the observation and tacsat dataset
+                                                                                    distObsTac  <- distance(ox[x],oy[x],tx,ty);
+                                                                                    if(is.null(control.euclidean$threshold)==FALSE){ idx <- which(distObsTac <= control.euclidean$threshold)} else { idx <- 1:length(distObsTac) }
+
+                                                                                    restime     <- difftime(tacsat$SI_DATIM[tacRows[idx]],obs$SI_DATIM[obsRows[x]],units="mins")
+                                                                                    if(is.null(temporalRange)==FALSE){ retrn       <- which(restime <= temporalRange[2] & restime >= temporalRange[1])
+                                                                                    } else { retrn <- 1:length(restime) }
+                                                                                    if(length(tacRows[idx[retrn]])>0){ toReturn <- cbind(tacRows[idx[retrn]],obs$GR_ID[obsRows[x]])
+                                                                                    } else { toReturn <- cbind(NA,NA) }
+
+                                                                                return(toReturn)}))
+
+
+
+        totRes  <- rbind(totRes,na.omit(res))
+      }
+      if(iNO != nChunkObs & iNT != nChunkTac){
+
+       obsRows            <- ((iNO*rowSize - rowSize +1):(iNO*rowSize))
+       tacRows            <- (((iNT*rowSize - rowSize + 1):(iNT*rowSize)))
+       res                <- outer(1:rowSize,1:rowSize,FUN=
+                                   function(x,y){
+                                      distObsTac  <- distance(ox[x],oy[x],tx[y],ty[y])
+                                   return(distObsTac)})
+
+       idx              <- apply(res,2,function(x){return(x <= control.euclidean$threshold)})
+       idx              <- which(idx == TRUE,arr.ind=TRUE)
+       restime          <- difftime(tacsat$SI_DATIM[tacRows[idx[,2]]],obs$SI_DATIM[obsRows[idx[,1]]],units="mins")
+
+       if(is.null(temporalRange)==FALSE){ retrn       <- which(restime <= temporalRange[2] & restime >= temporalRange[1])
+       } else { retrn <- 1:length(restime) }
+       if(length(retrn)>0){
+         res              <- cbind(tacRows[idx[retrn,2]],obs$GR_ID[obsRows[idx[retrn,1]]])
+         totRes           <- rbind(totRes,na.omit(res))
+       }
+      }
+    }
+      
+    }#End iNT loop
+  }#End iNO loop
+  tacsat$GR_ID    <- NA
+  dubTacsat       <- tacsat[totRes[,1],]
+  dubTacsat$GR_ID <- totRes[,2]
+
+
+  retrn   <- list(obs,dubTacsat)
+}#End method euclidean
+
+if(all.t) retrn[[2]] <- merge(retrn[[2]],tacsatOrig,by=colnames(tacsatOrig),all=TRUE)
+
+return(retrn)}
diff --git a/vmstools/R/compareToOrdination.r b/vmstools/R/compareToOrdination.r
index 1f56597..b61b87f 100644
--- a/vmstools/R/compareToOrdination.r
+++ b/vmstools/R/compareToOrdination.r
@@ -1,236 +1,367 @@
-###########################################################################
-#   DETERMINE M�TIERS LEVEL 7 AND 5 WITH TWO ORDINATION METHODS           #
-#   (FIRST SPECIES IN CATCH AND FIRST GROUP OF SPECIES IN CATCH)          #
-#                                                                         #
-#   DETERMINE M�TIERS LEVEL5 FROM M�TIERS LEVEL7 FOUND WITH THE           #
-#   MULTIVARIATE CLASSIFICATION (DETERMINING THE TARGET SPECIES           #
-#   FOR EACH CLUSTER)                                                     #
-#                                                                         #
-#   COMPARE THE M�TIERS FOUND WITH THE ORDINATION METHODS AND             #
-#   WITH THE MULTIVARIATE CLASSIFICATION                                  #
-###########################################################################  
-  
-compareToOrdination=function(dat, Step2, clusters, targetSpecies){
-
-  # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5).
-  data(correspLevel7to5)
-  
-  # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters).
-  data(correspMixedMetier)
-  
-  
-  ##########################
-  # FIRST SPECIES IN CATCH #
-  ##########################
-  
-  dat=as.matrix(dat[,2:ncol(dat)])
-  p=ncol(dat)
-  firstSp=list()
-  nbLogFirstSp=list()
-
-  # Determine the first species for each logevent (the catchest species of the logevent)
-  firstSp=apply(dat,1,function(x) names(which.max(x)))
-  metiersFirstSpeciesL7=firstSp
-  
-  # Calculate the number of logevent by first species (number of logevents by cluster)
-  nbLogFirstSp=sapply(unique(unlist(firstSp)),function(x) table(firstSp==x)[2])
-
-  names(nbLogFirstSp)=substr(names(nbLogFirstSp),1,3)
-  
-  # Number of logevents linked to each species from the method "first species in catch"
-  png("Number of logevents by ordination metier level7.png", width = 1200, height = 800)
-  barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)], main="Number of logevents by ordination metier level7", xlab="Main species", ylab="Number of Logevents", las=3)
-  dev.off()
-  # idem without the species with the biggest number of logevents
-  png(paste("Number of logevents by ordination metier level7 without ",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],".png",sep=""), width = 1200, height = 800)
-  barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)][-1], main=paste("Number of logevents by ordination metier level7 without",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],sep=" "), xlab="Main species", ylab="Number of Logevents", las=3)
-  dev.off()
-                                  
-  
-  # Names of species from the ordination method
-  nomEspOrdi=names(nbLogFirstSp)
-  nbEspOrdi=length(nomEspOrdi)
-  nomEspOrdi=nomEspOrdi[order(nomEspOrdi,decreasing=FALSE)]
-  # Initialization
-  numEspOrdiClarai=numeric()
-  nomEspOrdiPlot=character()
-  nbLogEspOrdiPlot=numeric()
-  nbClust=length(unique(clusters))
-  
-  # For each cluster Clara
-  for(i in 1:nbClust){
-    # Names of species from ordination method linked to the logevents of cluster Clara i
-    nomEspOrdiClarai=unique(unlist(firstSp[which(clusters==i)]))
-    # their number
-    numEspOrdiClarai=sapply(nomEspOrdiClarai, function(x) which(nomEspOrdi==x))
-    
-    # Names of species to plot in the barplot of cluster i
-    nomEspOrdiPloti=rep("",nbEspOrdi)
-    nomEspOrdiPloti[numEspOrdiClarai]=nomEspOrdiClarai
-    nomEspOrdiPlot=rbind(nomEspOrdiPlot,nomEspOrdiPloti)
-    
-    # for the cluster i, number of logevents of cluster i linked to each species from ordination method
-    nbLogEspOrdiPloti=rep(0,nbEspOrdi)
-    nbLogEspOrdiPloti[numEspOrdiClarai]=sapply(nomEspOrdiClarai,function(x) rev(table(firstSp[which(clusters==i)]==x))[1])
-    nbLogEspOrdiPlot=rbind(nbLogEspOrdiPlot,nbLogEspOrdiPloti)
-  }
-  colnames(nbLogEspOrdiPlot)=names(nbLogFirstSp)[order(names(nbLogFirstSp))]
-  rownames(nbLogEspOrdiPlot)=paste("nbLogFirstSpCluster",seq=1:nbClust,sep="")
-  
-  # Graphics
-  png("Ordination_metiersL7_by_cluster.png", width = 1200, height = 800)
-  op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-  for(i in 1:nbClust){
-    op2 <- par(las=2)
-    barplot(nbLogEspOrdiPlot[i,],names.arg=nomEspOrdiPlot[i,], las=3)
-    par(op2)
-    mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-  }
-  par(op)
-  title(main=paste("Number of logevents linked to each metier from the ordination method 'first species' by cluster","\n","\n",sep=""))
-  dev.off()
-  
-  # Projections on the first factorial plans
-  nomEspOrdi=substr(names(nbLogFirstSp),1,3)
-  espOrdi=matrix()
-  espOrdi=rbind(names(nbLogFirstSp),seq(1:length(nbLogFirstSp)))
-  numFirstSp=firstSp
-  for(i in 1:length(nbLogFirstSp)){
-    numFirstSp[which(firstSp==nomEspOrdi[i])]=espOrdi[2,which(espOrdi[1,]==nomEspOrdi[i])]
-  }
-
-  # Projections on the first factorial plans
-  png("Ordination_Projections.png", width = 1200, height = 800)
-  op <- par(mfrow=c(2,3))
-  plot(Step2[,1], Step2[,2], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
-  plot(Step2[,2], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
-  plot(Step2[,1], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
-  plot(Step2[,1], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
-  plot(Step2[,2], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
-  plot(Step2[,3], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
-  par(op)
-  dev.off()
-  
-  
-  # Determine the metier level 5 from the metier level 7 based on the first species in catch (logevent by logevent)
-  print("Please, be patient...") 
-  metiersFirstSpeciesL5=lapply(as.list(firstSp), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} 
-                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"])
-  metiersFirstSpeciesL5=unlist(lapply(metiersFirstSpeciesL5, function(x) as.character(x)))
-   
-   
-   
-                                      
-  ###################################
-  # FIRST GROUP OF SPECIES IN CATCH #
-  ###################################
-  
-  # Determine the metier of each logevent thanks to the first group of species in catch (level 5) of the logevent
-  datL5=dat
-  groupColSpecies=lapply(as.list(colnames(datL5)), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} 
-                                                               else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"])  
-  groupColSpecies=unlist(lapply(groupColSpecies, function(x) as.character(x)))
-  colnames(datL5)=groupColSpecies
-  nbC5=length(unique(colnames(datL5)))
-  tab=matrix(NA,nrow=nrow(datL5),ncol=nbC5)
-  colnames(tab)=unique(colnames(datL5))
-  for(i in 1:nbC5){
-    # Sub-data containing only one metier level 5.
-    subdati=datL5[,which(colnames(datL5)==(unique(colnames(datL5))[i]))]
-    if(length(which(colnames(datL5)==(unique(colnames(datL5))[i])))==1) tab[,i]=subdati
-    else
-      tab[,i]=apply(subdati,1,sum)
-  }
-  # Metiers (level 5) of each logevent found thanks to the first group of species in catch (level 5)
-  metiersFirstGroupL5=unique(colnames(datL5))[apply(tab,1,which.max)]
-  
-  
-
-
-  
-  
-  ###############################################################
-  # LEVELS 7 AND 5 FOR METIERS FROM MULTIVARIATE CLASSIFICATION #
-  ###############################################################
-
-  # List of metiers (level 5.7)
-  listMetiersLevel57=list()
-  for (i in 1:nbClust){
-    metiersClusteri=lapply(targetSpecies[[i]], function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"}
-                                                        else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"])
-    metiersClusteri=as.character(unique(unlist(metiersClusteri)))
-    metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
-    listMetiersLevel57[[i]]=metiersClusteri
-  }
-  
-  # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method 
-  #(where mixed metiers labels are the aggregation of several simple metiers labels).
-  metiersClustersL5=unlist(sapply(clusters,function(x) listMetiersLevel57[x]))
-
-
-  # Correspondence of mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). 
-  listMixedMetiersLevel57=lapply(listMetiersLevel57, function(x) if(nchar(x)!=3 & is.element(x,correspMixedMetier[,1])){correspMixedMetier[which(correspMixedMetier[,"combMetiersL5"]==x),"mixedMetierL5"]}
-                                            else x )
-                                            
-  # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method (with official labels for mixed metiers).                                         
-  mixedMetiersClustersL5=unlist(sapply(clusters,function(x) listMixedMetiersLevel57[x]))
-
-  
-
-
-  
-  #################
-  #  COMPARISONS  # 
-  #################
-  
-  # Compare the metiers 'Clusters' (from getMetierClusters) (level 7) vs the metier 'First Species' in catch (level 7)
-  compClustersL7vsFirstSpeciesL7=table(clusters,metiersFirstSpeciesL7)
-  rownames(compClustersL7vsFirstSpeciesL7)=paste("Clust",seq=1:nbClust,sep="")
-  
-  # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Species' (level 5)
-  clustStep3L5=clusters
-  compClustersL5vsFirstSpeciesL5=table(clustStep3L5,metiersFirstSpeciesL5)    
-  rownames(compClustersL5vsFirstSpeciesL5)=unlist(listMixedMetiersLevel57) 
-  
-  # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Group' (level 5)
-  #compL5ClustStep3VsGroup=table(clustersStep3L5,metiersFirstGroupL5)  !! pb regroupement des clusters qui correspondent au m�me groupe d'esp�ce niveau 5.
-  clustStep3L5=clusters
-  compClustersL5vsFirstGroupL5=table(clustStep3L5,metiersFirstGroupL5)
-  rownames(compClustersL5vsFirstGroupL5)=unlist(listMixedMetiersLevel57)
-  
-  # Compare the metiers 'First Species' (level 5) vs the metiers 'First Group' (level 5)
-  compFirstSpeciesL5vsFirstGroupL5=table(metiersFirstSpeciesL5,metiersFirstGroupL5)
-  
-  
-  # Less attractive
-  # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'Clusters' (level 7)
-  clustStep3L7=clusters
-  compClustersL5vsClustersL7=table(mixedMetiersClustersL5,clustStep3L7)    
-  colnames(compClustersL5vsClustersL7)=paste("Clust",seq=1:nbClust)
-  
-  
-           
-  # Create csv tables
-  dfTables=data.frame()
-  dfTables=c("compClustersL7vsFirstSpeciesL7","compClustersL5vsFirstSpeciesL5",
-  "compClustersL5vsFirstGroupL5","compFirstSpeciesL5vsFirstGroupL5","compClustersL5vsClustersL7")
-  write.table(dfTables[1],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(compClustersL7vsFirstSpeciesL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(dfTables[2],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(compClustersL5vsFirstSpeciesL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(dfTables[3],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(compClustersL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(dfTables[4],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(compFirstSpeciesL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(dfTables[5],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-  write.table(compClustersL5vsClustersL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
-      
-  print("Done.")
-  
-  return(list(nbLogFirstSp=nbLogFirstSp, 
-    compClustersL7vsFirstSpeciesL7=compClustersL7vsFirstSpeciesL7, 
-    compClustersL5vsFirstSpeciesL5=compClustersL5vsFirstSpeciesL5,
-    compClustersL5vsFirstGroupL5=compClustersL5vsFirstGroupL5,
-    compFirstSpeciesL5vsFirstGroupL5=compFirstSpeciesL5vsFirstGroupL5,
-    compClustersL5vsClustersL7=compClustersL5vsClustersL7))
-}
\ No newline at end of file
+###########################################################################
+#   DETERMINE M�TIERS LEVEL 7 AND 5 WITH TWO ORDINATION METHODS           #
+#   (FIRST SPECIES IN CATCH AND FIRST GROUP OF SPECIES IN CATCH)          #
+#                                                                         #
+#   DETERMINE M�TIERS LEVEL5 FROM M�TIERS LEVEL7 FOUND WITH THE           #
+#   MULTIVARIATE CLASSIFICATION (DETERMINING THE TARGET SPECIES           #
+#   FOR EACH CLUSTER)                                                     #
+#                                                                         #
+#   COMPARE THE M�TIERS FOUND WITH THE ORDINATION METHODS AND             #
+#   WITH THE MULTIVARIATE CLASSIFICATION                                  #
+###########################################################################  
+  
+
+
+#' Compare the metiers found with the classification from clustering e.g. CLARA
+#' against the metiers found by two simple alternative ordination methods:
+#' 'first species' & 'first group'.
+#' 
+#' This function allows to compare the metiers found using the classification
+#' from clustering against the metiers found by two alternative ordination
+#' methods, 'first species' and 'first group'.
+#' 
+#' The 'first species' method consists of characterizing the level 7 of each
+#' logevent by the species with the highest catch.  Then, the level 5 is
+#' deduced from the target assemblage of species for which this species belongs
+#' to.
+#' 
+#' The 'first group' method alternatively assigns a level 5 to each logevent
+#' correponding to the target assemblage having the highest catch volume.
+#' 
+#' The classification from clustering defines level 7 metiers for which each
+#' metier is characterized by one or several species, called target species,
+#' unlike the metiers from the simpler ordination methods. Hence, mixed metiers
+#' can be obtained in case that these species belong to different target
+#' assemblages.
+#' 
+#' This function enlights the differences when defining the metiers between the
+#' classification from clustering and the simpler ordination methods, and
+#' potentially demonstrates the higher power of the data clustering method in
+#' obtaining exhaustive and accurately defined metiers.
+#' 
+#' 
+#' @param dat a reduced data.frame from an eflalo format. It should contain
+#' only the LE_ID (Logevent ID) variable as well as all species names in
+#' columns, with raw catch data. It may be needed to sort out potential
+#' error-prone rows (such as rows with only 0) prior to the analysis, and to
+#' also replace NA values by 0.
+#' @param Step2 numerical matrix with logevents as rows, and values to be used
+#' for calculating distances between individuals as columns. This matrix is
+#' produced at the step 2 of the metier analysis, output of the function
+#' getTableAfterPCA().  In case a PCA is run, the selected axes will appear as
+#' columns. If no PCA is run, the matrix will be the same as datSpecies
+#' (produced at the step 1 of the metier analysis, using the function
+#' extractTableMainSpecies()), with percentage values by species.
+#' @param clusters the vector storing the cluster label of each logbook event.
+#' This vector will be produced at the step 3 of the metier analysis, using the
+#' function getMetierClusters().
+#' @param targetSpecies a list giving the target species by cluster. This list
+#' will be produced at the step 3 of the metier analysis, using the function
+#' getMetierClusters().
+#' @return The function returns a list with a number of tables on the
+#' comparison of the three methods: \item{nbLogFirstSp}{An integer vector
+#' giving the number of logevents allocated to each metier (level 7) defined by
+#' the ordination method 'first species'. }
+#' \item{compClustersL7vsFirstSpeciesL7}{A table giving the distribution of the
+#' logevents of each metier (level 7) defined by the multivariate
+#' classification in the metiers (level 7) defined by the ordination method
+#' 'first species'. } \item{compClustersL5vsFirstSpeciesL5}{A table giving the
+#' distribution of the logevents of each metier (level 5) defined by the
+#' multivariate classification in the metiers (level 5) defined by the
+#' ordination method 'first species'. } \item{compClustersL5vsFirstGroupL5}{A
+#' table giving the distribution of the logevents of each metier (level 5)
+#' defined by the multivariate classification in the metiers (level 5) defined
+#' by the ordination method 'first group'. }
+#' \item{compFirstSpeciesL5vsFirstGroupL5}{A table giving the distribution of
+#' the logevents of each metier (level 5) defined by the ordination method
+#' 'first species' in the metiers (level 5) defined by the ordination method
+#' 'first group'. } \item{compClustersL5vsClustersL7}{A table giving the
+#' distribution of the logevents of each metier (level 5) defined by the
+#' multivariate classification in the metiers (level 7) defined by the
+#' multivariate classification. }
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @seealso \code{\link{getEflaloMetierLevel7}},
+#' \code{\link{selectMainSpecies}}, \code{\link{extractTableMainSpecies}},
+#' \code{\link{getMetierClusters}}, \code{\link{getTableAfterPCA}},
+#' \code{\link{getMetierClusters}}
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @examples
+#' 
+#' 
+#'   
+#'   
+#'   \dontrun{
+#'   
+#'   data(eflalo)
+#'   
+#'   eflalo <- formatEflalo(eflalo)
+#' 
+#'   eflalo <- eflalo[eflalo$LE_GEAR=="OTB",]
+#' 
+#'   # note that output plots will be sent to getwd()
+#'   analysisName <- "metier_analysis_OTB"
+#'   
+#'   dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))]
+#'   names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]]))
+#' 
+#'   explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE)
+#'     #=> send the LE_ID and LE_KG_SP columns only
+#'            
+#'   Step1 <- extractTableMainSpecies(dat, explo$NamesMainSpeciesHAC,
+#'               paramTotal=95, paramLogevent=100)
+#'     #=> send the LE_ID and LE_KG_SP columns only             
+#' 
+#'   rowNamesSave <- row.names(Step1)
+#'   row.names(Step1) <- 1:nrow(Step1)
+#'   
+#'   # Run a PCA
+#'   Step2 <- getTableAfterPCA(Step1, analysisName,
+#'               pcaYesNo="pca", criterion="70percents")
+#' 
+#'   row.names(Step1) <- rowNamesSave
+#'   row.names(Step2) <- rowNamesSave
+#' 
+#'   # Define a metier for each logevent running the CLARA algorithm 
+#'   Step3 <- getMetierClusters(Step1, Step2, analysisName,
+#'               methMetier="clara", param1="euclidean", param2=NULL)
+#'   
+#'   # Compare the differences between the metiers defined by CLARA
+#'   # and the metiers defined by two simple ordination methods
+#'   compMetiers <- compareToOrdination(dat, Step2,
+#'                     clusters=Step3$clusters$clustering,
+#'                     targetSpecies=Step3$targetSpecies)
+#'     #=> send the LE_ID and LE_KG_SP columns only             
+#' 
+#'   }
+#'   
+#' 
+#' @export compareToOrdination
+compareToOrdination=function(dat, Step2, clusters, targetSpecies){
+
+  # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5).
+  data(correspLevel7to5)
+  
+  # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters).
+  data(correspMixedMetier)
+  
+  
+  ##########################
+  # FIRST SPECIES IN CATCH #
+  ##########################
+  
+  dat=as.matrix(dat[,2:ncol(dat)])
+  p=ncol(dat)
+  firstSp=list()
+  nbLogFirstSp=list()
+
+  # Determine the first species for each logevent (the catchest species of the logevent)
+  firstSp=apply(dat,1,function(x) names(which.max(x)))
+  metiersFirstSpeciesL7=firstSp
+  
+  # Calculate the number of logevent by first species (number of logevents by cluster)
+  nbLogFirstSp=sapply(unique(unlist(firstSp)),function(x) table(firstSp==x)[2])
+
+  names(nbLogFirstSp)=substr(names(nbLogFirstSp),1,3)
+  
+  # Number of logevents linked to each species from the method "first species in catch"
+  png("Number of logevents by ordination metier level7.png", width = 1200, height = 800)
+  barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)], main="Number of logevents by ordination metier level7", xlab="Main species", ylab="Number of Logevents", las=3)
+  dev.off()
+  # idem without the species with the biggest number of logevents
+  png(paste("Number of logevents by ordination metier level7 without ",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],".png",sep=""), width = 1200, height = 800)
+  barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)][-1], main=paste("Number of logevents by ordination metier level7 without",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],sep=" "), xlab="Main species", ylab="Number of Logevents", las=3)
+  dev.off()
+                                  
+  
+  # Names of species from the ordination method
+  nomEspOrdi=names(nbLogFirstSp)
+  nbEspOrdi=length(nomEspOrdi)
+  nomEspOrdi=nomEspOrdi[order(nomEspOrdi,decreasing=FALSE)]
+  # Initialization
+  numEspOrdiClarai=numeric()
+  nomEspOrdiPlot=character()
+  nbLogEspOrdiPlot=numeric()
+  nbClust=length(unique(clusters))
+  
+  # For each cluster Clara
+  for(i in 1:nbClust){
+    # Names of species from ordination method linked to the logevents of cluster Clara i
+    nomEspOrdiClarai=unique(unlist(firstSp[which(clusters==i)]))
+    # their number
+    numEspOrdiClarai=sapply(nomEspOrdiClarai, function(x) which(nomEspOrdi==x))
+    
+    # Names of species to plot in the barplot of cluster i
+    nomEspOrdiPloti=rep("",nbEspOrdi)
+    nomEspOrdiPloti[numEspOrdiClarai]=nomEspOrdiClarai
+    nomEspOrdiPlot=rbind(nomEspOrdiPlot,nomEspOrdiPloti)
+    
+    # for the cluster i, number of logevents of cluster i linked to each species from ordination method
+    nbLogEspOrdiPloti=rep(0,nbEspOrdi)
+    nbLogEspOrdiPloti[numEspOrdiClarai]=sapply(nomEspOrdiClarai,function(x) rev(table(firstSp[which(clusters==i)]==x))[1])
+    nbLogEspOrdiPlot=rbind(nbLogEspOrdiPlot,nbLogEspOrdiPloti)
+  }
+  colnames(nbLogEspOrdiPlot)=names(nbLogFirstSp)[order(names(nbLogFirstSp))]
+  rownames(nbLogEspOrdiPlot)=paste("nbLogFirstSpCluster",seq=1:nbClust,sep="")
+  
+  # Graphics
+  png("Ordination_metiersL7_by_cluster.png", width = 1200, height = 800)
+  op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+  for(i in 1:nbClust){
+    op2 <- par(las=2)
+    barplot(nbLogEspOrdiPlot[i,],names.arg=nomEspOrdiPlot[i,], las=3)
+    par(op2)
+    mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+  }
+  par(op)
+  title(main=paste("Number of logevents linked to each metier from the ordination method 'first species' by cluster","\n","\n",sep=""))
+  dev.off()
+  
+  # Projections on the first factorial plans
+  nomEspOrdi=substr(names(nbLogFirstSp),1,3)
+  espOrdi=matrix()
+  espOrdi=rbind(names(nbLogFirstSp),seq(1:length(nbLogFirstSp)))
+  numFirstSp=firstSp
+  for(i in 1:length(nbLogFirstSp)){
+    numFirstSp[which(firstSp==nomEspOrdi[i])]=espOrdi[2,which(espOrdi[1,]==nomEspOrdi[i])]
+  }
+
+  # Projections on the first factorial plans
+  png("Ordination_Projections.png", width = 1200, height = 800)
+  op <- par(mfrow=c(2,3))
+  plot(Step2[,1], Step2[,2], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
+  plot(Step2[,2], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
+  plot(Step2[,1], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
+  plot(Step2[,1], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
+  plot(Step2[,2], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
+  plot(Step2[,3], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
+  par(op)
+  dev.off()
+  
+  
+  # Determine the metier level 5 from the metier level 7 based on the first species in catch (logevent by logevent)
+  print("Please, be patient...") 
+  metiersFirstSpeciesL5=lapply(as.list(firstSp), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} 
+                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"])
+  metiersFirstSpeciesL5=unlist(lapply(metiersFirstSpeciesL5, function(x) as.character(x)))
+   
+   
+   
+                                      
+  ###################################
+  # FIRST GROUP OF SPECIES IN CATCH #
+  ###################################
+  
+  # Determine the metier of each logevent thanks to the first group of species in catch (level 5) of the logevent
+  datL5=dat
+  groupColSpecies=lapply(as.list(colnames(datL5)), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} 
+                                                               else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"])  
+  groupColSpecies=unlist(lapply(groupColSpecies, function(x) as.character(x)))
+  colnames(datL5)=groupColSpecies
+  nbC5=length(unique(colnames(datL5)))
+  tab=matrix(NA,nrow=nrow(datL5),ncol=nbC5)
+  colnames(tab)=unique(colnames(datL5))
+  for(i in 1:nbC5){
+    # Sub-data containing only one metier level 5.
+    subdati=datL5[,which(colnames(datL5)==(unique(colnames(datL5))[i]))]
+    if(length(which(colnames(datL5)==(unique(colnames(datL5))[i])))==1) tab[,i]=subdati
+    else
+      tab[,i]=apply(subdati,1,sum)
+  }
+  # Metiers (level 5) of each logevent found thanks to the first group of species in catch (level 5)
+  metiersFirstGroupL5=unique(colnames(datL5))[apply(tab,1,which.max)]
+  
+  
+
+
+  
+  
+  ###############################################################
+  # LEVELS 7 AND 5 FOR METIERS FROM MULTIVARIATE CLASSIFICATION #
+  ###############################################################
+
+  # List of metiers (level 5.7)
+  listMetiersLevel57=list()
+  for (i in 1:nbClust){
+    metiersClusteri=lapply(targetSpecies[[i]], function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"}
+                                                        else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"])
+    metiersClusteri=as.character(unique(unlist(metiersClusteri)))
+    metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
+    listMetiersLevel57[[i]]=metiersClusteri
+  }
+  
+  # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method 
+  #(where mixed metiers labels are the aggregation of several simple metiers labels).
+  metiersClustersL5=unlist(sapply(clusters,function(x) listMetiersLevel57[x]))
+
+
+  # Correspondence of mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). 
+  listMixedMetiersLevel57=lapply(listMetiersLevel57, function(x) if(nchar(x)!=3 & is.element(x,correspMixedMetier[,1])){correspMixedMetier[which(correspMixedMetier[,"combMetiersL5"]==x),"mixedMetierL5"]}
+                                            else x )
+                                            
+  # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method (with official labels for mixed metiers).                                         
+  mixedMetiersClustersL5=unlist(sapply(clusters,function(x) listMixedMetiersLevel57[x]))
+
+  
+
+
+  
+  #################
+  #  COMPARISONS  # 
+  #################
+  
+  # Compare the metiers 'Clusters' (from getMetierClusters) (level 7) vs the metier 'First Species' in catch (level 7)
+  compClustersL7vsFirstSpeciesL7=table(clusters,metiersFirstSpeciesL7)
+  rownames(compClustersL7vsFirstSpeciesL7)=paste("Clust",seq=1:nbClust,sep="")
+  
+  # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Species' (level 5)
+  clustStep3L5=clusters
+  compClustersL5vsFirstSpeciesL5=table(clustStep3L5,metiersFirstSpeciesL5)    
+  rownames(compClustersL5vsFirstSpeciesL5)=unlist(listMixedMetiersLevel57) 
+  
+  # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Group' (level 5)
+  #compL5ClustStep3VsGroup=table(clustersStep3L5,metiersFirstGroupL5)  !! pb regroupement des clusters qui correspondent au m�me groupe d'esp�ce niveau 5.
+  clustStep3L5=clusters
+  compClustersL5vsFirstGroupL5=table(clustStep3L5,metiersFirstGroupL5)
+  rownames(compClustersL5vsFirstGroupL5)=unlist(listMixedMetiersLevel57)
+  
+  # Compare the metiers 'First Species' (level 5) vs the metiers 'First Group' (level 5)
+  compFirstSpeciesL5vsFirstGroupL5=table(metiersFirstSpeciesL5,metiersFirstGroupL5)
+  
+  
+  # Less attractive
+  # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'Clusters' (level 7)
+  clustStep3L7=clusters
+  compClustersL5vsClustersL7=table(mixedMetiersClustersL5,clustStep3L7)    
+  colnames(compClustersL5vsClustersL7)=paste("Clust",seq=1:nbClust)
+  
+  
+           
+  # Create csv tables
+  dfTables=data.frame()
+  dfTables=c("compClustersL7vsFirstSpeciesL7","compClustersL5vsFirstSpeciesL5",
+  "compClustersL5vsFirstGroupL5","compFirstSpeciesL5vsFirstGroupL5","compClustersL5vsClustersL7")
+  write.table(dfTables[1],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(compClustersL7vsFirstSpeciesL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(dfTables[2],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(compClustersL5vsFirstSpeciesL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(dfTables[3],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(compClustersL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(dfTables[4],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(compFirstSpeciesL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(dfTables[5],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+  write.table(compClustersL5vsClustersL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA)
+      
+  print("Done.")
+  
+  return(list(nbLogFirstSp=nbLogFirstSp, 
+    compClustersL7vsFirstSpeciesL7=compClustersL7vsFirstSpeciesL7, 
+    compClustersL5vsFirstSpeciesL5=compClustersL5vsFirstSpeciesL5,
+    compClustersL5vsFirstGroupL5=compClustersL5vsFirstGroupL5,
+    compFirstSpeciesL5vsFirstGroupL5=compFirstSpeciesL5vsFirstGroupL5,
+    compClustersL5vsClustersL7=compClustersL5vsClustersL7))
+}
diff --git a/vmstools/R/countPings.r b/vmstools/R/countPings.r
index 508e22d..cb6af64 100644
--- a/vmstools/R/countPings.r
+++ b/vmstools/R/countPings.r
@@ -1,80 +1,118 @@
-countPings <- function(formula,tacsat,grid=NULL,by=NULL){
-
-                  #Turn selected variabels into element list
-                  form <- formula
-                  if (form[[1]] != "~")
-                      stop("Error: Formula must be one-sided.")
-                  formc <- as.character(form[2])
-                  formc <- gsub(" ", "", formc)
-                  if (!is.element(substring(formc, 1, 1), c("+", "-")))
-                      formc <- paste("+", formc, sep = "")
-                  vars <- unlist(strsplit(formc, "[\\+\\-]"))
-                  vars <- vars[vars != ""]
-                  signs <- formc
-                  for (i in 1:length(vars)) {
-                      signs <- gsub(vars[i], "", signs)
-                  }
-                  signs <- unlist(strsplit(signs, "")) #Currently we do not use signs
-
-                  #Define which variables selected are column names, time variables or spatial variables
-                  Vars      <- vars[which(!vars %in% c("day","week","month","quarter","year","gridcell","icesrectangle","icesarea"))]
-                  timeVars  <- vars[which(vars %in% c("day","week","month","quarter","year"))]
-                  spatVars  <- vars[which(vars %in% c("gridcell","icesrectangle","icesarea"))]
-
-                  #Add time notation if you want this as output
-                  if(length(timeVars)>0){
-                    if(!length(grep("SI_DATIM",colnames(tacsat)))>0) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-                    if("day" %in% timeVars & !"SI_DAY" %in% colnames(tacsat)){       tacsat$SI_DAY   <- an(format(tacsat$SI_DATIM,format="%j"))};      if("day" %in% timeVars){   ; timeVars[which(timeVars=="day")]      <- "SI_DAY"}
-                    if("week" %in% timeVars & !"SI_WEEK" %in% colnames(tacsat)){      tacsat$SI_WEEK  <- an(format(tacsat$SI_DATIM,format="%W"))};     if("week" %in% timeVars){   ; timeVars[which(timeVars=="week")]     <- "SI_WEEK" }
-                    if("month" %in% timeVars & !"SI_MONTH" %in% colnames(tacsat)){     tacsat$SI_MONTH <- an(format(tacsat$SI_DATIM,format="%m"))};    if("month" %in% timeVars){   ; timeVars[which(timeVars=="month")]    <- "SI_MONTH"}
-                    if("quarter" %in% timeVars & !"SI_QUART" %in% colnames(tacsat)){   tacsat$SI_QUART <- an(substr(quarters(tacsat$SI_DATIM),2,2))};  if("quarter" %in% timeVars){ ; timeVars[which(timeVars=="quarter")]  <- "SI_QUART"}
-                    if("year" %in% timeVars & !"SI_YEAR" %in% colnames(tacsat)){      tacsat$SI_YEAR  <- an(format(tacsat$SI_DATIM,format="%Y"))};     if("year" %in% timeVars){   ; timeVars[which(timeVars=="year")]     <- "SI_YEAR" }
-                  }
-                  #Add spatial notation if you want this as output
-                  if(length(spatVars)>0){
-                    if("gridcell" %in% spatVars & is.null(grid) == TRUE) stop("Grid needs to be specified to use the 'gridcell' option")
-                    if("gridcell" %in% spatVars & is.null(grid) == FALSE){
-                      #Create coordinates of tacsat data
-                      coords                    <- cbind(x=tacsat$SI_LONG,y=tacsat$SI_LATI)
-                      sPDF                      <- SpatialPointsDataFrame(coords,data=tacsat)
-                      #Turn grid into a spatial pixel dataframe
-                      grid                      <- as(grid,"SpatialPixels");
-                      grid                      <- as(grid,"SpatialPixelsDataFrame")
-                      #Overlay the two spatial frameworks to see to which gridcell each tacsat coordinate belongs
-                      gridCellIndex             <- over(as(sPDF,"SpatialPoints"),as(grid,"SpatialPixels"))
-                      newCoords                 <- sPDF@coords[gridCellIndex,]
-
-                      tacsat$GR_LONG            <- newCoords[,1]
-                      tacsat$GR_LATI            <- newCoords[,2]
-                      spatVars[which(spatVars=="gridcell")] <- "GR_LONG"; spatVars <- c(spatVars,"GR_LATI")
-                    }
-                    if("icesrectangle" %in% spatVars){
-                      if(!"LE_RECT" %in% colnames(tacsat))
-                        tacsat$LE_RECT <- ICESrectangle(tacsat)
-                      spatVars[which(spatVars=="icesrectangle")] <- "LE_RECT"
-                    }
-                    if("icesarea" %in% spatVars){
-                      if(!"LE_AREA" %in% colnames(tacsat))
-                        tacsat$LE_AREA <- ICESarea(tacsat)
-                      spatVars[which(spatVars=="icesarea")] <- "LE_AREA"
-                    }
-                  }
-
-                  if(is.null(by)){
-                    tacsat$SUM      <- 1
-                  } else {
-                    tacsat$SUM      <- tacsat[,by]
-                    }
-                  totVars         <- c(Vars,timeVars,spatVars)
-
-                  #Do the counting of pings
-                  for(iVars in 1:length(totVars)) tacsat[,totVars[iVars]] <- af(ac(tacsat[,totVars[iVars]]))
-                  DT              <- data.table(tacsat)
-                  eq              <- c.listquote(totVars)
-
-                  res             <- DT[,sum(SUM),by=eval(eq)]
-                  setnames(res,colnames(res),c(totVars,"pings"))
-                  #colnames(res)   <- c(totVars,"pings")
-
-              return(data.frame(res))}
-
+#' Count the number of VMS pings in a selection
+#' 
+#' Count the number of VMS pings in any selection made in time and spatial
+#' frame.
+#' 
+#' Formula has form: ~Variable+timeVariable+anotherTimeVariable+spatialVariable
+#' \cr
+#' 
+#' options in formula for Variable: any column name of tacsat file \cr options
+#' in formula for timeVariable: day, week, month, quarter, year \cr options in
+#' formula for spatialVariable: icesrectangle, icesarea, gridcell \cr
+#' 
+#' @param formula specify the elements you want to use as axis of the count
+#' @param tacsat tacsat data file, possibly with additional columns
+#' @param grid if in formulate 'gridcell' is chosen, a SpatialGrid must be
+#' provided
+#' @return Returns the matrix with counted pings by each specified variable
+#' @note if Tacsat is a big file, the overlay function might fail resulting in
+#' terminating the function
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{createGrid}}, \code{\link{vmsGridCreate}}
+#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using
+#' cubic Hermite spline interpolation of position registration data, EU lot 2
+#' project
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#' #make the tacsat file a bit smaller
+#' tacsat  <- tacsat[1:10000,]
+#' 
+#' grid          <- createGrid(range(tacsat$SI_LONG,na.rm=TRUE),
+#'                   range(tacsat$SI_LATI,na.rm=TRUE),0.5,0.5,type="SpatialGrid")
+#' 
+#' result        <- countPings(~VE_REF+year+gridcell,tacsat,grid=grid)
+#' result        <- countPings(~VE_REF+week+year+icesrectangle,tacsat)
+#' 
+#' @export countPings
+countPings <- function(formula,tacsat,grid=NULL,by=NULL){
+
+                  #Turn selected variabels into element list
+                  form <- formula
+                  if (form[[1]] != "~")
+                      stop("Error: Formula must be one-sided.")
+                  formc <- as.character(form[2])
+                  formc <- gsub(" ", "", formc)
+                  if (!is.element(substring(formc, 1, 1), c("+", "-")))
+                      formc <- paste("+", formc, sep = "")
+                  vars <- unlist(strsplit(formc, "[\\+\\-]"))
+                  vars <- vars[vars != ""]
+                  signs <- formc
+                  for (i in 1:length(vars)) {
+                      signs <- gsub(vars[i], "", signs)
+                  }
+                  signs <- unlist(strsplit(signs, "")) #Currently we do not use signs
+
+                  #Define which variables selected are column names, time variables or spatial variables
+                  Vars      <- vars[which(!vars %in% c("day","week","month","quarter","year","gridcell","icesrectangle","icesarea"))]
+                  timeVars  <- vars[which(vars %in% c("day","week","month","quarter","year"))]
+                  spatVars  <- vars[which(vars %in% c("gridcell","icesrectangle","icesarea"))]
+
+                  #Add time notation if you want this as output
+                  if(length(timeVars)>0){
+                    if(!length(grep("SI_DATIM",colnames(tacsat)))>0) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+                    if("day" %in% timeVars & !"SI_DAY" %in% colnames(tacsat)){       tacsat$SI_DAY   <- an(format(tacsat$SI_DATIM,format="%j"))};      if("day" %in% timeVars){   ; timeVars[which(timeVars=="day")]      <- "SI_DAY"}
+                    if("week" %in% timeVars & !"SI_WEEK" %in% colnames(tacsat)){      tacsat$SI_WEEK  <- an(format(tacsat$SI_DATIM,format="%W"))};     if("week" %in% timeVars){   ; timeVars[which(timeVars=="week")]     <- "SI_WEEK" }
+                    if("month" %in% timeVars & !"SI_MONTH" %in% colnames(tacsat)){     tacsat$SI_MONTH <- an(format(tacsat$SI_DATIM,format="%m"))};    if("month" %in% timeVars){   ; timeVars[which(timeVars=="month")]    <- "SI_MONTH"}
+                    if("quarter" %in% timeVars & !"SI_QUART" %in% colnames(tacsat)){   tacsat$SI_QUART <- an(substr(quarters(tacsat$SI_DATIM),2,2))};  if("quarter" %in% timeVars){ ; timeVars[which(timeVars=="quarter")]  <- "SI_QUART"}
+                    if("year" %in% timeVars & !"SI_YEAR" %in% colnames(tacsat)){      tacsat$SI_YEAR  <- an(format(tacsat$SI_DATIM,format="%Y"))};     if("year" %in% timeVars){   ; timeVars[which(timeVars=="year")]     <- "SI_YEAR" }
+                  }
+                  #Add spatial notation if you want this as output
+                  if(length(spatVars)>0){
+                    if("gridcell" %in% spatVars & is.null(grid) == TRUE) stop("Grid needs to be specified to use the 'gridcell' option")
+                    if("gridcell" %in% spatVars & is.null(grid) == FALSE){
+                      #Create coordinates of tacsat data
+                      coords                    <- cbind(x=tacsat$SI_LONG,y=tacsat$SI_LATI)
+                      sPDF                      <- SpatialPointsDataFrame(coords,data=tacsat)
+                      #Turn grid into a spatial pixel dataframe
+                      grid                      <- as(grid,"SpatialPixels");
+                      grid                      <- as(grid,"SpatialPixelsDataFrame")
+                      #Overlay the two spatial frameworks to see to which gridcell each tacsat coordinate belongs
+                      gridCellIndex             <- over(as(sPDF,"SpatialPoints"),as(grid,"SpatialPixels"))
+                      newCoords                 <- sPDF@coords[gridCellIndex,]
+
+                      tacsat$GR_LONG            <- newCoords[,1]
+                      tacsat$GR_LATI            <- newCoords[,2]
+                      spatVars[which(spatVars=="gridcell")] <- "GR_LONG"; spatVars <- c(spatVars,"GR_LATI")
+                    }
+                    if("icesrectangle" %in% spatVars){
+                      if(!"LE_RECT" %in% colnames(tacsat))
+                        tacsat$LE_RECT <- ICESrectangle(tacsat)
+                      spatVars[which(spatVars=="icesrectangle")] <- "LE_RECT"
+                    }
+                    if("icesarea" %in% spatVars){
+                      if(!"LE_AREA" %in% colnames(tacsat))
+                        tacsat$LE_AREA <- ICESarea(tacsat)
+                      spatVars[which(spatVars=="icesarea")] <- "LE_AREA"
+                    }
+                  }
+
+                  if(is.null(by)){
+                    tacsat$SUM      <- 1
+                  } else {
+                    tacsat$SUM      <- tacsat[,by]
+                    }
+                  totVars         <- c(Vars,timeVars,spatVars)
+
+                  #Do the counting of pings
+                  for(iVars in 1:length(totVars)) tacsat[,totVars[iVars]] <- af(ac(tacsat[,totVars[iVars]]))
+                  DT              <- data.table(tacsat)
+                  eq              <- c.listquote(totVars)
+
+                  res             <- DT[,sum(SUM),by=eval(eq)]
+                  setnames(res,colnames(res),c(totVars,"pings"))
+                  #colnames(res)   <- c(totVars,"pings")
+
+              return(data.frame(res))}
+
diff --git a/vmstools/R/createGrid.r b/vmstools/R/createGrid.r
index 606e91b..5dd90c9 100644
--- a/vmstools/R/createGrid.r
+++ b/vmstools/R/createGrid.r
@@ -1,60 +1,98 @@
-`createGrid` <-
-function(xrange
-                             ,yrange
-                             ,resx
-                             ,resy
-                             ,type="GridTopology"
-                             ,exactBorder=FALSE
-                             ){
-                
-                require(sp)
-                if(exactBorder){
-                  xs            <- seq(xrange[1]+resx/2,xrange[2]+resx,resx)
-                  xborder       <- range(xs)
-                  ys            <- seq(yrange[1]+resy/2,yrange[2]+resy,resy)
-                  yborder       <- range(ys)
-                  } else {
-                    roundDigitx <- getndp(resx)
-                    roundDigity <- getndp(resy)
-                    xs          <- seq(xrange[1],xrange[2] +resx,resx)
-                    xborder     <- c((min(floor(xs*10^roundDigitx))/10^roundDigitx)- resx/2,(max(ceiling(xs*10^roundDigitx)) / 10^roundDigitx)+resx/2)
-                    ys          <- seq(yrange[1],yrange[2] +resy,resy)
-                    yborder     <- c((min(floor(ys*10^roundDigity))/10^roundDigity)- resy/2,(max(ceiling(ys*10^roundDigity)) / 10^roundDigity)+resy/2)
-                   }
-
-                grid        <- GridTopology(c(xborder[1],yborder[1]),c(resx,resy),c(length(seq(xborder[1],xborder[2],resx)),length(seq(yborder[1],yborder[2],resy))))
-                if(type=="SpatialGrid"){
-                  grid      <- SpatialGrid(grid=grid);
-                }
-                if(type=="SpatialPixels"){
-                  grid      <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy))))
-                  grid      <- SpatialGrid(grid=grid);
-                  gridded(grid) = TRUE
-                  grid      <- as(grid,"SpatialPixels");
-                }
-                if(type=="SpatialPixelsDataFrame"){
-                  grid      <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy))))
-                  grid      <- SpatialGrid(grid=grid);
-                  gridded(grid) = TRUE
-                  grid      <- as(grid,"SpatialPixels");
-                  sPDF      <- as(grid,"SpatialPixelsDataFrame")
-                  sPDF@data     <- data.frame(rep(0,nrow(coordinates(sPDF))))
-                  colnames(sPDF@data) <- "data"
-                  grid          <- sPDF
-                }
-                if(type=="SpatialGridDataFrame"){
-                  grid      <- SpatialGrid(grid=grid);
-                  sPDF          <- as(grid,"SpatialGridDataFrame")
-                  sPDF@data     <- data.frame(rep(0,nrow(coordinates(sPDF))))
-                  colnames(sPDF@data) <- "data"
-                  grid          <- sPDF
-                }
-                
-               if(!class(grid)=="GridTopology"){
-                 bb <- bbox(grid)
-                 if(bb[1,1] > min(xrange) | max(xrange) > bb[1,2] | bb[2,1] > min(yrange) | max(yrange) > bb[2,2])
-                  stop("Dimensions of grid not large enough to span xranges and yranges (bug)")
-               }
-                
-              return(grid)}
-
+#' create a Grid (GridTopology: sp) depending on x and y limits and gridcell
+#' size
+#' 
+#' Function creates a grid with rounded (no decimal) begin and end longitude
+#' and latitude based on the ranges given. It creates this grid with gridcell
+#' size specified.
+#' 
+#' The grid is created depending on the 'sp' package. The grid defined has
+#' default class 'GridTopology'.
+#' 
+#' @param xrange specify minimum and maximum longitude / x values which should
+#' at least be in the grid
+#' @param yrange specify minimum and maximum latitude / y values which should
+#' at least be in the grid
+#' @param resx gridcell size in degrees in the longitude / x direction
+#' @param resy gridcell size in degrees in the latitude / y direction
+#' @param type specify output class:
+#' 'GridTopology','SpatialGrid','SpatialPixels',
+#' 'SpatialPixelsDataFrame','SpatialGridDataFrame'
+#' @param exactBorder Logical: if the specified resx and resy (lower-left
+#' values) should be taken as exact
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{vmsGridCreate}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' x <- seq(-4.999,5,length.out=10)
+#' y <- seq(50.002,55,length.out=10)
+#' xrange <- range(x,na.rm=TRUE)
+#' yrange <- range(y,na.rm=TRUE)
+#' 
+#' #returns a grid with 1001 cells in the x-direction and 101 in the y-direction
+#' Grid <- createGrid(xrange,yrange,0.01,0.05,type="SpatialGrid")
+#' bbox(Grid)
+#' Grid <- createGrid(xrange,yrange,0.01,0.05,type="SpatialGrid",exactBorder=TRUE)
+#' bbox(Grid)
+#' 
+#' @export createGrid
+`createGrid` <-
+function(xrange
+                             ,yrange
+                             ,resx
+                             ,resy
+                             ,type="GridTopology"
+                             ,exactBorder=FALSE
+                             ){
+                
+                require(sp)
+                if(exactBorder){
+                  xs            <- seq(xrange[1]+resx/2,xrange[2]+resx,resx)
+                  xborder       <- range(xs)
+                  ys            <- seq(yrange[1]+resy/2,yrange[2]+resy,resy)
+                  yborder       <- range(ys)
+                  } else {
+                    roundDigitx <- getndp(resx)
+                    roundDigity <- getndp(resy)
+                    xs          <- seq(xrange[1],xrange[2] +resx,resx)
+                    xborder     <- c((min(floor(xs*10^roundDigitx))/10^roundDigitx)- resx/2,(max(ceiling(xs*10^roundDigitx)) / 10^roundDigitx)+resx/2)
+                    ys          <- seq(yrange[1],yrange[2] +resy,resy)
+                    yborder     <- c((min(floor(ys*10^roundDigity))/10^roundDigity)- resy/2,(max(ceiling(ys*10^roundDigity)) / 10^roundDigity)+resy/2)
+                   }
+
+                grid        <- GridTopology(c(xborder[1],yborder[1]),c(resx,resy),c(length(seq(xborder[1],xborder[2],resx)),length(seq(yborder[1],yborder[2],resy))))
+                if(type=="SpatialGrid"){
+                  grid      <- SpatialGrid(grid=grid);
+                }
+                if(type=="SpatialPixels"){
+                  grid      <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy))))
+                  grid      <- SpatialGrid(grid=grid);
+                  gridded(grid) = TRUE
+                  grid      <- as(grid,"SpatialPixels");
+                }
+                if(type=="SpatialPixelsDataFrame"){
+                  grid      <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy))))
+                  grid      <- SpatialGrid(grid=grid);
+                  gridded(grid) = TRUE
+                  grid      <- as(grid,"SpatialPixels");
+                  sPDF      <- as(grid,"SpatialPixelsDataFrame")
+                  sPDF@data     <- data.frame(rep(0,nrow(coordinates(sPDF))))
+                  colnames(sPDF@data) <- "data"
+                  grid          <- sPDF
+                }
+                if(type=="SpatialGridDataFrame"){
+                  grid      <- SpatialGrid(grid=grid);
+                  sPDF          <- as(grid,"SpatialGridDataFrame")
+                  sPDF@data     <- data.frame(rep(0,nrow(coordinates(sPDF))))
+                  colnames(sPDF@data) <- "data"
+                  grid          <- sPDF
+                }
+                
+               if(!class(grid)=="GridTopology"){
+                 bb <- bbox(grid)
+                 if(bb[1,1] > min(xrange) | max(xrange) > bb[1,2] | bb[2,1] > min(yrange) | max(yrange) > bb[2,2])
+                  stop("Dimensions of grid not large enough to span xranges and yranges (bug)")
+               }
+                
+              return(grid)}
+
diff --git a/vmstools/R/degree2Km.R b/vmstools/R/degree2Km.R
index 26992e2..3e83959 100644
--- a/vmstools/R/degree2Km.R
+++ b/vmstools/R/degree2Km.R
@@ -1,12 +1,35 @@
-`degree2Km` <-
-function(lon,lat,degree){
-                      x1 <- lon
-                      y1 <- lat
-                      
-                      a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2);
-                        c <- 2*atan2(sqrt(a),sqrt(1-a));
-                        R <- 6371;
-                        dx1 <- R*c
-                        
-              return(dx1 * degree)}
-
+#' Compute distance from degrees into kilometers
+#' 
+#' Function transformes the distance expressed in degrees into kilometers. This
+#' based on the GPS location of a point.
+#' 
+#' 
+#' @param lon Longitude of the GPS position
+#' @param lat Latitude of the GPS positiona
+#' @param degree Value in degrees to turn into Km
+#' @note Computation of Km is approximation based on the Haversine formula
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{distance}}, \code{\link{km2Degree}},
+#' \code{\link{lonLatRatio}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' lon <- -4
+#' lat <- 50
+#' degree <- 1.601833
+#' 
+#' degree2Km(lon,lat,degree) #114.4897km
+#' 
+#' @export degree2Km
+`degree2Km` <-
+function(lon,lat,degree){
+                      x1 <- lon
+                      y1 <- lat
+                      
+                      a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2);
+                        c <- 2*atan2(sqrt(a),sqrt(1-a));
+                        R <- 6371;
+                        dx1 <- R*c
+                        
+              return(dx1 * degree)}
+
diff --git a/vmstools/R/destFromBearing.r b/vmstools/R/destFromBearing.r
index 6218a37..f19b745 100644
--- a/vmstools/R/destFromBearing.r
+++ b/vmstools/R/destFromBearing.r
@@ -1,15 +1,36 @@
-destFromBearing <- function(lon,lat,bearing,distance){
-
-                  pd    <- pi/180
-                  dist  <- (distance/6371)
-                  y1    <- lat * pd
-                  x1    <- lon * pd
-                  bear  <- bearing * pd
-
-                  y2  <- asin(sin(y1) * cos(dist) + cos(y1) * sin(dist) * cos(bear))
-                  x2  <- x1 + atan2(sin(bear) * sin(dist) * cos(y1),cos(dist) - sin(y1) * sin(y2))
-
-                  x2  <- (x2 + 3*pi) %% (2*pi) - pi
-                  y2  <- y2 / pd
-                  x2  <- x2 / pd
-                return(cbind(x2,y2))}
\ No newline at end of file
+#' Find destination from point of origin given bearing and distance
+#' 
+#' Find destination from point of origin given bearing and distance
+#' 
+#' 
+#' @param lon Longitude of origin
+#' @param lat Latitude of origin
+#' @param bearing Bearing to destination
+#' @param distance Distance to cover to destination in km
+#' @return Returnes the destination point(s) as matrix
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{addWidth}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' res <- destFromBearing(rep(2.5,10),rep(51.5,10),
+#'                        runif(10,0,360),runif(10,0,0.1))
+#' plot(res[,1],res[,2])
+#' points(2.5,51.5,cex=1.1,pch=19)
+#' 
+#' @export destFromBearing
+destFromBearing <- function(lon,lat,bearing,distance){
+
+                  pd    <- pi/180
+                  dist  <- (distance/6371)
+                  y1    <- lat * pd
+                  x1    <- lon * pd
+                  bear  <- bearing * pd
+
+                  y2  <- asin(sin(y1) * cos(dist) + cos(y1) * sin(dist) * cos(bear))
+                  x2  <- x1 + atan2(sin(bear) * sin(dist) * cos(y1),cos(dist) - sin(y1) * sin(y2))
+
+                  x2  <- (x2 + 3*pi) %% (2*pi) - pi
+                  y2  <- y2 / pd
+                  x2  <- x2 / pd
+                return(cbind(x2,y2))}
diff --git a/vmstools/R/diffInter.R b/vmstools/R/diffInter.R
index 4fa7b8f..a1dfabc 100644
--- a/vmstools/R/diffInter.R
+++ b/vmstools/R/diffInter.R
@@ -1,43 +1,99 @@
-diffInter <- function(interpolation
-                                   ,reference){
-
-
-      #Get the starting and ending positions of the interpolations
-    interIdx    <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,nrow=length(interpolation),dimnames=list(interpolation=1:length(interpolation),c("x","y")),byrow=TRUE)
-      #Store the deviations from the interpolation and reference dataset
-    storeDiffs  <- matrix(NA,nrow=length(interpolation),ncol=5,dimnames=list(1:length(interpolation),c("mean","logmean","sd","logsd","sum")))
-
-      #Loop over all the interpolations
-    for(i in 1:length(interpolation)){
-      int       <- interpolation[[i]]
-      ref       <- reference[seq(interIdx[i,1],interIdx[i,2],1),]
-
-        #Calculate the difference between each datapoint
-      distInt <- distance(int[3:dim(int)[1],1],  int[3:dim(int)[1],2],  int[2:(dim(int)[1]-1),1],      int[2:(dim(int)[1]-1),2])
-      distRef <- distance(ref$SI_LONG[2:dim(ref)[1]], ref$SI_LATI[2:dim(ref)[1]], ref$SI_LONG[1:(dim(ref)[1]-1)], ref$SI_LATI[1:(dim(ref)[1]-1)])
-        #To calculate the total distance travelled, sum all individual distances
-      cumsumInt     <- cumsum(distInt)
-      cumsumRef     <- cumsum(distRef)
-        #To select the points on the interpolated track to match up with the reference points we do the following
-        #- First rescale the distance travelled within the reference set to equal the total distance in the interpolated set
-        #- Than substract the distance travelled in the reference set for each point from the distance travelled in the interpolated set, between all points
-        #- Search for the point in the interpolated set that comes closest to that distance travelled
-        # By doing this, we assume that an equal distance of the total in the reference set and in the interpolated set is travelled
-        # This enables the user too to make use of irregular reference set polling rates, as matching points are found based on distance travelled
-        # As well, if vessels speed between two points and slow down between other, this does match up better with the interpolated set
-      matchRefDist  <- c(cumsumRef / (rev(cumsumRef)[1]/rev(cumsumInt)[1]))
-      matchRef      <- c(1,apply(abs(outer(matchRefDist,cumsumInt,"-")),1,which.min)+1)
-
-      matchPx       <- int[matchRef+1,1]
-      matchPy       <- int[matchRef+1,2]
-
-        #Calculate the distance between the reference points and the points on the interpolated track that are matched
-      res <- distance(matchPx,matchPy,ref$SI_LONG,ref$SI_LATI)
-        #Store the differences for each interpolation
-      storeDiffs[i,]<- c( mean(res[-c(1,length(res))],na.rm=TRUE),
-                          exp(mean(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)),
-                          sd(res[-c(1,length(res))],na.rm=TRUE),
-                          exp(sd(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)),
-                          sum(res[-c(1,length(res))],na.rm=TRUE))
-    }
-return(storeDiffs)}
+#' Calculate distance between interpolation and reference set
+#' 
+#' Calculate for each interpolation the distance at fixed points, depending on
+#' the number of points present in the reference set, between the interpolated
+#' trajectory and the reference trajectory. This indicates the deviation of the
+#' interpolated set to the reference set.
+#' 
+#' Each interpolation has a start and end point which are similar to the start
+#' and end point of the reference set. In between these two points, the
+#' reference set can have more in-between points which are not used for
+#' interpolation, depending on the interval chosen in the 'interpolateVMS'
+#' function. These points are matched up with the points on the interpolated
+#' track. For each of these reference in-between points, it is calculated which
+#' fraction of the total distance was travelled. The same fraction is applied
+#' to the interpolated track, and the point that matches with this distance is
+#' linked to the reference set point. The distance function is used to
+#' calculate the distance between the reference and interpolated dataset.
+#' 
+#' The mean, logmean, standard deviation, sdlog and total sum of these
+#' distances is returned as a matrix.
+#' 
+#' @param interpolation Interpolated dataset as output from the function
+#' 'interpolateVMS'
+#' @param reference Original high higher resolution VMS dataset
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{distance}}, \code{\link{distanceInterpolation}},
+#' \code{\link{distanceVMS}}
+#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using
+#' cubic Hermite spline interpolation of position registration data, EU lot 2
+#' project
+#' @examples
+#' 
+#' \dontrun{
+#' data(VMShf)
+#' 
+#' #-Put the data in the right format
+#' VMShf$SI_DATE <- format(as.Date(VMShf$date),"%d/%m/%Y")
+#' VMShf$SI_TIME <- format(VMShf$date, "%H:%M")
+#' 
+#' colnames(VMShf) <- c("VE_REF","SI_LATI","SI_LONG","SI_SP",
+#'                      "SI_HE","SI_DATIM","SI_DATE","SI_TIME")
+#' 
+#' #-Sort the data and remove non-fishing pings
+#' VMShf   <- sortTacsat(VMShf)
+#' VMShf   <- filterTacsat(VMShf,c(2,6),NULL,T)
+#' 
+#' interpolation <- interpolateTacsat(VMShf,interval=120,margin=10,res=100,
+#'                     method="cHs",params=list(fm=0.3,distscale=20,
+#'                     sigline=0.2,st=c(2,6)),headingAdjustment=0)
+#' 
+#' #Returns a matrix with 21 rows (each row represents 1 interpolation)
+#' # and 5 measures
+#' cHs <- diffInter(interpolation,VMShf)
+#' }
+#' 
+#' @export diffInter
+diffInter <- function(interpolation
+                                   ,reference){
+
+
+      #Get the starting and ending positions of the interpolations
+    interIdx    <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,nrow=length(interpolation),dimnames=list(interpolation=1:length(interpolation),c("x","y")),byrow=TRUE)
+      #Store the deviations from the interpolation and reference dataset
+    storeDiffs  <- matrix(NA,nrow=length(interpolation),ncol=5,dimnames=list(1:length(interpolation),c("mean","logmean","sd","logsd","sum")))
+
+      #Loop over all the interpolations
+    for(i in 1:length(interpolation)){
+      int       <- interpolation[[i]]
+      ref       <- reference[seq(interIdx[i,1],interIdx[i,2],1),]
+
+        #Calculate the difference between each datapoint
+      distInt <- distance(int[3:dim(int)[1],1],  int[3:dim(int)[1],2],  int[2:(dim(int)[1]-1),1],      int[2:(dim(int)[1]-1),2])
+      distRef <- distance(ref$SI_LONG[2:dim(ref)[1]], ref$SI_LATI[2:dim(ref)[1]], ref$SI_LONG[1:(dim(ref)[1]-1)], ref$SI_LATI[1:(dim(ref)[1]-1)])
+        #To calculate the total distance travelled, sum all individual distances
+      cumsumInt     <- cumsum(distInt)
+      cumsumRef     <- cumsum(distRef)
+        #To select the points on the interpolated track to match up with the reference points we do the following
+        #- First rescale the distance travelled within the reference set to equal the total distance in the interpolated set
+        #- Than substract the distance travelled in the reference set for each point from the distance travelled in the interpolated set, between all points
+        #- Search for the point in the interpolated set that comes closest to that distance travelled
+        # By doing this, we assume that an equal distance of the total in the reference set and in the interpolated set is travelled
+        # This enables the user too to make use of irregular reference set polling rates, as matching points are found based on distance travelled
+        # As well, if vessels speed between two points and slow down between other, this does match up better with the interpolated set
+      matchRefDist  <- c(cumsumRef / (rev(cumsumRef)[1]/rev(cumsumInt)[1]))
+      matchRef      <- c(1,apply(abs(outer(matchRefDist,cumsumInt,"-")),1,which.min)+1)
+
+      matchPx       <- int[matchRef+1,1]
+      matchPy       <- int[matchRef+1,2]
+
+        #Calculate the distance between the reference points and the points on the interpolated track that are matched
+      res <- distance(matchPx,matchPy,ref$SI_LONG,ref$SI_LATI)
+        #Store the differences for each interpolation
+      storeDiffs[i,]<- c( mean(res[-c(1,length(res))],na.rm=TRUE),
+                          exp(mean(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)),
+                          sd(res[-c(1,length(res))],na.rm=TRUE),
+                          exp(sd(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)),
+                          sum(res[-c(1,length(res))],na.rm=TRUE))
+    }
+return(storeDiffs)}
diff --git a/vmstools/R/distance.R b/vmstools/R/distance.R
index f6c76ab..ce7d2e1 100644
--- a/vmstools/R/distance.R
+++ b/vmstools/R/distance.R
@@ -1,13 +1,39 @@
-`distance` <-
-function(lon,lat,lonRef,latRef){
-
-                    pd <- pi/180
-
-                    a1<- sin(((latRef-lat)*pd)/2)
-                    a2<- cos(lat*pd)
-                    a3<- cos(latRef*pd)
-                    a4<- sin(((lonRef-lon)*pd)/2)
-                    a <- a1*a1+a2*a3*a4*a4
-
-                                      c <- 2*atan2(sqrt(a),sqrt(1-a));
-                    return(6371*c)}
\ No newline at end of file
+#' Compute distance between two points on a sphere (approximation of the earth)
+#' 
+#' Compute the distance between two GPS locations defined in longitude and
+#' latitude notation on the earth. The earth is assumed to have a perfect
+#' spherical shape. Distance is returned in km.
+#' 
+#' 
+#' @param lon Longitude of point 2
+#' @param lat Latitude of point 2
+#' @param lonRef Longitude of point 1
+#' @param latRef Latitude of point 1
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}},
+#' \code{\link{lonLatRatio}}
+#' @references EU Lot 2 project, based on the Haversine formula, see also:
+#' Hintzen et al. 2010 Fisheries Research
+#' @examples
+#' 
+#' lon <- -4
+#' lat <- 50
+#' lonRef <- -4.2
+#' latRef <- 51
+#' 
+#' distance(lon,lat,lonRef,latRef) #112.09
+#' 
+#' @export distance
+`distance` <-
+function(lon,lat,lonRef,latRef){
+
+                    pd <- pi/180
+
+                    a1<- sin(((latRef-lat)*pd)/2)
+                    a2<- cos(lat*pd)
+                    a3<- cos(latRef*pd)
+                    a4<- sin(((lonRef-lon)*pd)/2)
+                    a <- a1*a1+a2*a3*a4*a4
+
+                                      c <- 2*atan2(sqrt(a),sqrt(1-a));
+                    return(6371*c)}
diff --git a/vmstools/R/distanceInterpolation.R b/vmstools/R/distanceInterpolation.R
index 190c16c..0dd0644 100644
--- a/vmstools/R/distanceInterpolation.R
+++ b/vmstools/R/distanceInterpolation.R
@@ -1,14 +1,54 @@
-distanceInterpolation <- function(interpolation){
-                                               
-                            res <- unlist(lapply(interpolation,function(x){
-                                                 dims        <- dim(x)
-                                                 res         <- distance(x[3:dims[1],1],x[3:dims[1],2],x[2:(dims[1]-1),1],x[2:(dims[1]-1),2])
-                                                 return(sum(res,na.rm=TRUE))}))
-                     
-                            return(res)}
-
-
-  
-
-
-  
\ No newline at end of file
+#' Calculate the distance (in km) from an interpolated dataset
+#' 
+#' For each interpolation between two succeeding datapoints, the distance
+#' travelled is computed based on the function 'distance()'. This function is
+#' an easy to use wrapper for a whole interpolated dataset
+#' 
+#' On default, each interpolation consists of 100 points. Hence, 99 distances
+#' are computed for each interpolation. Returned are 5
+#' 
+#' @param interpolation Interpolated dataset as output from the function
+#' 'interpolateVMS'
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{distance}}, \code{\link{distanceTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' #Speed threshold points (two values), NULL means use all points
+#' st        <- c(2,6)
+#' #Remove duplicate records in VMS dataset
+#' remDup    <- TRUE
+#' 
+#'   #Sort the tacsat data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Filter the tacsat data
+#' tacsat     <- filterTacsat(tacsat,st,NULL,remDup)
+#' 
+#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,
+#'                   res=100,method="cHs",params=list(fm=0.3,distscale=20,
+#'                   sigline=0.2,st=c(2,6)),headingAdjustment=0)
+#'   #Number of interpolations:
+#' length(interpolation)
+#'   #Calculate distance:
+#'   #Returns 159 values, which represents the distance travelled in
+#'   #each of the 159 interpolations
+#' distanceInterpolation(interpolation)
+#' 
+#' @export distanceInterpolation
+distanceInterpolation <- function(interpolation){
+                                               
+                            res <- unlist(lapply(interpolation,function(x){
+                                                 dims        <- dim(x)
+                                                 res         <- distance(x[3:dims[1],1],x[3:dims[1],2],x[2:(dims[1]-1),1],x[2:(dims[1]-1),2])
+                                                 return(sum(res,na.rm=TRUE))}))
+                     
+                            return(res)}
+
+
+  
+
+
+  
diff --git a/vmstools/R/distanceTacsat.r b/vmstools/R/distanceTacsat.r
index 3c6871e..de0f4f1 100644
--- a/vmstools/R/distanceTacsat.r
+++ b/vmstools/R/distanceTacsat.r
@@ -1,11 +1,47 @@
-distanceTacsat <- function(tacsat,index){
-
-                   res <- unlist(lapply(as.list(1:dim(index)[1]),function(x){
-                              iS  <- index[x,1]
-                              iE  <- index[x,2]
-                              iL  <- iE-iS+1
-                              res <- distance(tacsat[iS:iE,]$SI_LONG[2:iL],tacsat[iS:iE,]$SI_LATI[2:iL],
-                              tacsat[iS:iE,]$SI_LONG[1:(iL-1)],tacsat[iS:iE,]$SI_LATI[1:(iL-1)])
-                           return(sum(res,na.rm=TRUE))}))
-
-               return(res)}
\ No newline at end of file
+#' Calculate the distance (in km) from a Tacsat dataset
+#' 
+#' Calculates the distance of a Tacsat dataset with specification of succeeding
+#' datapoints. Distance is only calculated between these specified datapoints
+#' 
+#' index is designed as a matrix where rows represent succeeding datapoints,
+#' column 1 represent start points, column 2 represent end points.
+#' 
+#' @param tacsat tacsat (normal or high ping rate) dataset
+#' @param index Matrix with specification of succeeding datapoints (see details
+#' for format)
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{distance}}, \code{\link{distanceInterpolation}},
+#' \code{\link{diffInter}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' 
+#' data(tacsat)
+#' #Speed threshold points (two values), NULL means use all points
+#' st        <- c(2,6)
+#' #Remove duplicate records in VMS dataset
+#' remDup    <- TRUE
+#' 
+#'   #Sort the VMS data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Filter the VMS data
+#' tacsat     <- filterTacsat(tacsat,st,NULL,remDup)
+#' 
+#' distanceTacsat(tacsat,matrix(c(2,3,3,4),nrow=2,ncol=2,
+#'                 dimnames=list(1:2,c("startpoint","endpoint"))))
+#' #6.335944 14.847291
+#' 
+#' @export distanceTacsat
+distanceTacsat <- function(tacsat,index){
+
+                   res <- unlist(lapply(as.list(1:dim(index)[1]),function(x){
+                              iS  <- index[x,1]
+                              iE  <- index[x,2]
+                              iL  <- iE-iS+1
+                              res <- distance(tacsat[iS:iE,]$SI_LONG[2:iL],tacsat[iS:iE,]$SI_LATI[2:iL],
+                              tacsat[iS:iE,]$SI_LONG[1:(iL-1)],tacsat[iS:iE,]$SI_LATI[1:(iL-1)])
+                           return(sum(res,na.rm=TRUE))}))
+
+               return(res)}
diff --git a/vmstools/R/effort.r b/vmstools/R/effort.r
index 98d1ddd..fe5e6be 100644
--- a/vmstools/R/effort.r
+++ b/vmstools/R/effort.r
@@ -1,3 +1,48 @@
+#' Calculate effort of tacsat or eflalo dataset
+#' 
+#' Calculate effort (in hours, days, ...) based on tacsat or eflalo dataset for
+#' different combination of settings
+#' 
+#' if 'byRow' is selected, no other elements can be added
+#' 
+#' @param x Either eflalo or tacsat format data
+#' @param by Vector including the elements to calculate effort by. Options are:
+#' byRow, VE_REF, FT_REF, LE_GEAR, SI_DAY, SI_WEEK, SI_MONTH, SI_QUARTER,
+#' SI_YEAR, LE_RECT, LE_AREA
+#' @param unit Unit must be in 'secs,mins,hours,days or weeks'
+#' @param weight Only relevant for tacsat: weight to apply to calculation of
+#' mean interval rate towards and away from ping
+#' @param fill.na Only relevant for tacsat: If interval rate cannot be
+#' calculated based on default or provided weight, take closest alternative to
+#' provide an interval rate
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{raiseTacsat}}
+#' @examples
+#' 
+#' data(eflalo)
+#' data(tacsat)
+#' #-Remove duplicated records from tacsat
+#' myf       <- paste(tacsat$VE_REF,tacsat$SI_LATI,tacsat$SI_LONG,
+#'                    tacsat$SI_DATE,tacsat$SI_TIME);
+#' tacsat    <- tacsat[!duplicated(myf),];
+#' 
+#' #- Try some out for eflalo
+#' a1 <- effort(eflalo,by=c("FT_REF"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a1$EFFORT)
+#' a2 <- effort(eflalo,by=c("SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a2$EFFORT)
+#' a3 <- effort(eflalo,by=c("VE_REF","SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a3$EFFORT)
+#' a4 <- effort(eflalo,by=c("VE_REF","SI_QUARTER","LE_GEAR"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a4$EFFORT)
+#' a5 <- effort(eflalo,by=c("byRow"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a5$EFFORT)
+#' a6 <- effort(eflalo,by=c("SI_DAY","LE_GEAR"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a6$EFFORT)
+#' 
+#' #- Try some out for tacsat
+#' tacsatp           <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' tacsatp$LE_GEAR   <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]
+#' b1 <- effort(tacsatp,by=c("FT_REF"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b1$EFFORT)
+#' b2 <- effort(tacsatp,by=c("SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b2$EFFORT)
+#' b3 <- effort(tacsatp,by=c("VE_REF","SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b3$EFFORT)
+#' b4 <- effort(tacsatp,by=c("VE_REF","SI_QUARTER","LE_GEAR"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b4$EFFORT)
+#' b5 <- effort(tacsatp,by=c("byRow"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b5$EFFORT,na.rm=T)
+#' @export effort
 effort <- function(x,by="FT_REF",unit="hours",weight=c(0.5,0.5),fill.na=FALSE){
               dattype <- ifelse(all(c("SI_LATI","SI_LONG") %in% colnames(x)),"tacsat","eflalo")
 
diff --git a/vmstools/R/eflalo2Pings.r b/vmstools/R/eflalo2Pings.r
index a2a510a..41d2a16 100644
--- a/vmstools/R/eflalo2Pings.r
+++ b/vmstools/R/eflalo2Pings.r
@@ -1,32 +1,52 @@
-  eflalo2Pings <- function(eflalo,tacsat,pings,vars,eflaloCol,remainTacsat,by=NULL){
-        #- Merge landings and values to get unique eflalo set given 'totVars'
-      for(iVars in 1:length(vars)){
-        eflalo[,vars[iVars]] <- af(ac(eflalo[,vars[iVars]]))
-        tacsat[,vars[iVars]] <- af(ac(tacsat[,vars[iVars]]))
-      }
-
-      DT                    <- data.table(eflalo)
-      eq1                   <- c.listquote(paste("sum(",colnames(eflalo[,kgeur(colnames(eflalo))]),",na.rm=TRUE)",sep=""))
-      eq2                   <- c.listquote(vars)
-
-      eflalo            <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(eflalo) <- c(vars,eflaloCol)
-      eflalo$ID         <- 1:nrow(eflalo)
-
-        #- Merge eflalo to pings to get number of pings per eflalo record
-      byPing             <- merge(eflalo,data.frame(pings),by=vars,all=FALSE)
-      byTacsat           <- merge(tacsat,byPing,by=vars,all=FALSE)
-      if(is.null(by)==FALSE)
-        byTacsat$pings   <- byTacsat$pings / byTacsat[,by]
-
-      try(print(paste("kg in eflalo",round(sum(byPing [,kgeur(colnames(byPing))])))))
-      try(print(paste("kg in merged tacsat",round(sum(sweep(byTacsat[,kgeur(colnames(byTacsat))],1,byTacsat$pings,"/"))))))
-
-        #- Bookkeeping which tacsat ID's have been merged and which have not yet been merged
-      remainTacsat          <- remainTacsat[which(!remainTacsat %in% byTacsat$ID.x)]
-
-        #- Bookkeeping which eflalo catches have been merged and which have not yet been merged
-      idx                   <- sort(unique(byPing$ID))
-      try(print(paste("kg removed from eflalo",round(sum(eflalo[idx,kgeur(colnames(eflalo))])))))
-      eflalo[idx,kgeur(colnames(eflalo))]   <- 0
-
-    return(list(eflalo=eflalo,tacsat=byTacsat,remainTacsat=remainTacsat))}
\ No newline at end of file
+#' Merge eflalo to tacsat pings
+#' 
+#' Internal function of splitAmongPings to merge eflalo landings or values to
+#' tacsat pings
+#' 
+#' 
+#' @param eflalo eflalo dataset
+#' @param tacsat tacsat dataset
+#' @param pings number of pings by variable
+#' @param vars variable to merge eflalo to tacsat
+#' @param eflaloCol column names of eflalo
+#' @param remainTacsat number of tacsat pings that have not been merged yet
+#' @return Returns a list of the eflalo dataset, but without the landings and
+#' values that have been merged, returns the merged tacsat dataset and returns
+#' the number tacsat pings that have not been merged yet
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{splitAmongPings}}, \code{\link{mergeEflalo2Tacsat}},
+#' \code{\link{mergeEflalo2Pings}}
+#' @references EU Lot 2 project
+#' @export eflalo2Pings
+  eflalo2Pings <- function(eflalo,tacsat,pings,vars,eflaloCol,remainTacsat,by=NULL){
+        #- Merge landings and values to get unique eflalo set given 'totVars'
+      for(iVars in 1:length(vars)){
+        eflalo[,vars[iVars]] <- af(ac(eflalo[,vars[iVars]]))
+        tacsat[,vars[iVars]] <- af(ac(tacsat[,vars[iVars]]))
+      }
+
+      DT                    <- data.table(eflalo)
+      eq1                   <- c.listquote(paste("sum(",colnames(eflalo[,kgeur(colnames(eflalo))]),",na.rm=TRUE)",sep=""))
+      eq2                   <- c.listquote(vars)
+
+      eflalo            <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(eflalo) <- c(vars,eflaloCol)
+      eflalo$ID         <- 1:nrow(eflalo)
+
+        #- Merge eflalo to pings to get number of pings per eflalo record
+      byPing             <- merge(eflalo,data.frame(pings),by=vars,all=FALSE)
+      byTacsat           <- merge(tacsat,byPing,by=vars,all=FALSE)
+      if(is.null(by)==FALSE)
+        byTacsat$pings   <- byTacsat$pings / byTacsat[,by]
+
+      try(print(paste("kg in eflalo",round(sum(byPing [,kgeur(colnames(byPing))])))))
+      try(print(paste("kg in merged tacsat",round(sum(sweep(byTacsat[,kgeur(colnames(byTacsat))],1,byTacsat$pings,"/"))))))
+
+        #- Bookkeeping which tacsat ID's have been merged and which have not yet been merged
+      remainTacsat          <- remainTacsat[which(!remainTacsat %in% byTacsat$ID.x)]
+
+        #- Bookkeeping which eflalo catches have been merged and which have not yet been merged
+      idx                   <- sort(unique(byPing$ID))
+      try(print(paste("kg removed from eflalo",round(sum(eflalo[idx,kgeur(colnames(eflalo))])))))
+      eflalo[idx,kgeur(colnames(eflalo))]   <- 0
+
+    return(list(eflalo=eflalo,tacsat=byTacsat,remainTacsat=remainTacsat))}
diff --git a/vmstools/R/eflalo2relational.r b/vmstools/R/eflalo2relational.r
index 1a8eb11..d108901 100644
--- a/vmstools/R/eflalo2relational.r
+++ b/vmstools/R/eflalo2relational.r
@@ -1,28 +1,46 @@
-eflalo2relational <- function(x){
-  idxkg             <- grep("KG",colnames(x))
-  idxeuro           <- grep("EURO",colnames(x))
-  selectColsKG      <- apply(x[,idxkg],  1,FUN=function(y){idx <- which(y >0);
-                                                           return(y[idx])})
-  selectColsEURO    <- apply(x[,idxeuro],1,FUN=function(y){idx <- which(y >0);
-                                                           return(y[idx])})
-
-
-  idxNonkgeur       <- which(!1:length(colnames(x)) %in% kgeur(colnames(x)))
-  xRelationalList   <- lapply(as.list(1:length(selectColsKG)),function(y){
-                               newX              <- data.frame(x[y,idxNonkgeur])
-                               newX$LE_SP        <- strsplit(names(selectColsKG[[y]][1]),"_")[[1]][3]
-                               newX$LE_KG        <- selectColsKG[[y]][1]
-                               newX$LE_EURO      <- selectColsEURO[[y]][1]
-                               if(length(selectColsKG[[y]])>1){
-                                 for(j in 2:length(selectColsKG[[y]])){
-                                   newX <- rbind(newX,
-                                                 data.frame(x[y,idxNonkgeur],LE_SP=strsplit(names(selectColsKG[[y]][j]),"_")[[1]][3],
-                                                                             LE_KG=selectColsKG[[y]][j],
-                                                                             LE_EURO=selectColsEURO[[y]][j]))
-                                 }
-                               }
-                               return(newX)})
-  xRelational       <- do.call(rbind,xRelationalList)
-return(xRelational)}
-#eflalo2 <- eflalo2relational(eflalo)
-
+#' Turn eflalo dataset into relational format
+#' 
+#' Turn the column setup of eflalo into a row setup where each species catch
+#' has its own row
+#' 
+#' May take a long time for long eflalo datasets
+#' 
+#' @param x Dataframe with eflalo data and eflalo format
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{formatEflalo}},\code{\link{readEflalo}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' data(eflalo)
+#' eflalo    <- eflalo[1:20,]
+#' eflaloRel <- eflalo2relational(eflalo)
+#' 
+#' @export eflalo2relational
+eflalo2relational <- function(x){
+  idxkg             <- grep("KG",colnames(x))
+  idxeuro           <- grep("EURO",colnames(x))
+  selectColsKG      <- apply(x[,idxkg],  1,FUN=function(y){idx <- which(y >0);
+                                                           return(y[idx])})
+  selectColsEURO    <- apply(x[,idxeuro],1,FUN=function(y){idx <- which(y >0);
+                                                           return(y[idx])})
+
+
+  idxNonkgeur       <- which(!1:length(colnames(x)) %in% kgeur(colnames(x)))
+  xRelationalList   <- lapply(as.list(1:length(selectColsKG)),function(y){
+                               newX              <- data.frame(x[y,idxNonkgeur])
+                               newX$LE_SP        <- strsplit(names(selectColsKG[[y]][1]),"_")[[1]][3]
+                               newX$LE_KG        <- selectColsKG[[y]][1]
+                               newX$LE_EURO      <- selectColsEURO[[y]][1]
+                               if(length(selectColsKG[[y]])>1){
+                                 for(j in 2:length(selectColsKG[[y]])){
+                                   newX <- rbind(newX,
+                                                 data.frame(x[y,idxNonkgeur],LE_SP=strsplit(names(selectColsKG[[y]][j]),"_")[[1]][3],
+                                                                             LE_KG=selectColsKG[[y]][j],
+                                                                             LE_EURO=selectColsEURO[[y]][j]))
+                                 }
+                               }
+                               return(newX)})
+  xRelational       <- do.call(rbind,xRelationalList)
+return(xRelational)}
+#eflalo2 <- eflalo2relational(eflalo)
+
diff --git a/vmstools/R/eflaloHaul2Tacsat.r b/vmstools/R/eflaloHaul2Tacsat.r
index ac637c7..669252d 100644
--- a/vmstools/R/eflaloHaul2Tacsat.r
+++ b/vmstools/R/eflaloHaul2Tacsat.r
@@ -1,31 +1,44 @@
-eflaloHaul2Tacsat <- function(x,npoints=NULL){
-
-  if(is.null(npoints)==T)
-    npoints   <- 10
-  if(length(npoints)!=nrow(x))
-    npoints   <- rep(npoints,nrow(x))
-
-  x$LE_SDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_STIME),format= "%d/%m/%Y %H:%M")
-  x$LE_EDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_ETIME),format= "%d/%m/%Y %H:%M")
-
-  #- Create tacsat template
-  y           <- data.frame(do.call(cbind,as.list(rep(NA,9))))
-  colnames(y) <- c("VE_COU","VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE","FT_REF")
-
-  taLst       <- lapply(as.list(1:nrow(x)),function(z){
-                    ta            <- y[rep(1,npoints[z]),]
-                    ta$VE_COU[]   <- x$VE_COU[z]
-                    ta$VE_REF[]   <- x$VE_REF[z]
-                    ta$SI_LATI[]  <- seq(x$LE_SLAT[z],x$LE_ELAT[z],length.out=npoints[z])
-                    ta$SI_LONG[]  <- seq(x$LE_SLON[z],x$LE_ELON[z],length.out=npoints[z])
-                    ta$SI_DATE[]  <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%d/%m/%Y")
-                    ta$SI_TIME[]  <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%H:%M")
-                    ta$SI_SP[]    <- c(distance(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z]) / 1.852) /
-                                     c(difftime(x$LE_EDATIM[z],x$LE_SDATIM[z],units="hours"))
-                    ta$SI_HE[]    <- bearing(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z])
-                    ta$FT_REF[]   <- x$FT_REF[z]
-                  return(ta)})
-
-  ta          <- do.call(rbind,taLst)
-  return(ta)}
-
+#' Convert eflalo (haul by haul / log-event) to tacsat format
+#' 
+#' Convert haul by haul / log event organised eflalo data to tacsat relational
+#' style
+#' 
+#' 
+#' @param x eflalo formatted dataset with haul-by-haul data (e.g.
+#' LE_STIME,LE_SLON)
+#' @param npoints Number of intermediate points to include (defaults to 10)
+#' @return Tacsat formatted dataset is returned
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{eflalo2Relational}}
+#' @export eflaloHaul2Tacsat
+eflaloHaul2Tacsat <- function(x,npoints=NULL){
+
+  if(is.null(npoints)==T)
+    npoints   <- 10
+  if(length(npoints)!=nrow(x))
+    npoints   <- rep(npoints,nrow(x))
+
+  x$LE_SDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_STIME),format= "%d/%m/%Y %H:%M")
+  x$LE_EDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_ETIME),format= "%d/%m/%Y %H:%M")
+
+  #- Create tacsat template
+  y           <- data.frame(do.call(cbind,as.list(rep(NA,9))))
+  colnames(y) <- c("VE_COU","VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE","FT_REF")
+
+  taLst       <- lapply(as.list(1:nrow(x)),function(z){
+                    ta            <- y[rep(1,npoints[z]),]
+                    ta$VE_COU[]   <- x$VE_COU[z]
+                    ta$VE_REF[]   <- x$VE_REF[z]
+                    ta$SI_LATI[]  <- seq(x$LE_SLAT[z],x$LE_ELAT[z],length.out=npoints[z])
+                    ta$SI_LONG[]  <- seq(x$LE_SLON[z],x$LE_ELON[z],length.out=npoints[z])
+                    ta$SI_DATE[]  <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%d/%m/%Y")
+                    ta$SI_TIME[]  <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%H:%M")
+                    ta$SI_SP[]    <- c(distance(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z]) / 1.852) /
+                                     c(difftime(x$LE_EDATIM[z],x$LE_SDATIM[z],units="hours"))
+                    ta$SI_HE[]    <- bearing(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z])
+                    ta$FT_REF[]   <- x$FT_REF[z]
+                  return(ta)})
+
+  ta          <- do.call(rbind,taLst)
+  return(ta)}
+
diff --git a/vmstools/R/equalDistance.r b/vmstools/R/equalDistance.r
index e881377..02ccf46 100644
--- a/vmstools/R/equalDistance.r
+++ b/vmstools/R/equalDistance.r
@@ -1,28 +1,62 @@
-equalDistance <- function(interpolation,res=10){
-
-                    #Calculate ditance of all interpolations at the same time
-                    totDist <- distanceInterpolation(interpolation)
-                    #Get dimensions of interpolations
-                    lngInt  <- lapply(interpolation,dim)
-
-                    #Warn if resolution of equal distance is too high compared to original resolution of interpolation
-                    if(min(unlist(lngInt)[seq(1,length(totDist),2)],na.rm=TRUE) < 9*res) warnings("Number of intermediate points in the interpolation might be too small for the equal distance pionts chosen")
-
-                    #Get distance steps to get equal distance
-                    eqStep  <- totDist/(res-1)
-                    
-                    #Get x-y values of all interpolations
-                    intidx  <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,byrow=TRUE)
-
-                    #Do the calculation
-                    result  <- lapply(interpolation,function(ind){
-                                                      i       <- which(intidx[,1] == ind[1,1] & intidx[,2] == ind[1,2])
-                                                      idx     <- apply(abs(outer(
-                                                                                 cumsum(distance(ind[3:lngInt[[i]][1],1],ind[3:lngInt[[i]][1],2],ind[2:(lngInt[[i]][1]-1),1],ind[2:(lngInt[[i]][1]-1),2])),
-                                                                                 seq(eqStep[i],totDist[i],eqStep[i]),
-                                                                                 "-")),
-                                                                       2,which.min)+1
-                                                      idx     <- c(1,idx)
-                                                    return(ind[c(1,idx+1),])})
-                  #Return the equal distance interpolated set in the same format as the interpolated dataset (as a list)
-                  return(result)}
+#' Interpolated points at equal distance
+#' 
+#' Returns the interpolated dataset with only those points remaining that are
+#' at equal eucledian distance from each other, with the number of points to
+#' retreive remaining.
+#' 
+#' 
+#' @param interpolation interpolated dataset obtained from the interpolation()
+#' function
+#' @param res number of points to retreive from function
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{filterTacsat}}, \code{\link{tacsat}},
+#' \code{\link{interpolateTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#' #Sort the VMS data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#' #Filter the Tacsat data
+#' tacsat     <- filterTacsat(tacsat,st=c(2,6),hd=NULL)
+#' 
+#' #Interpolate the VMS data
+#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,
+#'                     res=100,method="cHs",params=list(fm=0.5,distscale=20,
+#'                     sigline=0.2,st=c(2,6)),headingAdjustment=0)
+#' 
+#' #Get a set back with only 10 points per interpolation at equal distance
+#' ed_interpolation <- equalDistance(interpolation,10)
+#' 
+#' @export equalDistance
+equalDistance <- function(interpolation,res=10){
+
+                    #Calculate ditance of all interpolations at the same time
+                    totDist <- distanceInterpolation(interpolation)
+                    #Get dimensions of interpolations
+                    lngInt  <- lapply(interpolation,dim)
+
+                    #Warn if resolution of equal distance is too high compared to original resolution of interpolation
+                    if(min(unlist(lngInt)[seq(1,length(totDist),2)],na.rm=TRUE) < 9*res) warnings("Number of intermediate points in the interpolation might be too small for the equal distance pionts chosen")
+
+                    #Get distance steps to get equal distance
+                    eqStep  <- totDist/(res-1)
+                    
+                    #Get x-y values of all interpolations
+                    intidx  <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,byrow=TRUE)
+
+                    #Do the calculation
+                    result  <- lapply(interpolation,function(ind){
+                                                      i       <- which(intidx[,1] == ind[1,1] & intidx[,2] == ind[1,2])
+                                                      idx     <- apply(abs(outer(
+                                                                                 cumsum(distance(ind[3:lngInt[[i]][1],1],ind[3:lngInt[[i]][1],2],ind[2:(lngInt[[i]][1]-1),1],ind[2:(lngInt[[i]][1]-1),2])),
+                                                                                 seq(eqStep[i],totDist[i],eqStep[i]),
+                                                                                 "-")),
+                                                                       2,which.min)+1
+                                                      idx     <- c(1,idx)
+                                                    return(ind[c(1,idx+1),])})
+                  #Return the equal distance interpolated set in the same format as the interpolated dataset (as a list)
+                  return(result)}
diff --git a/vmstools/R/estimatePropFishing.r b/vmstools/R/estimatePropFishing.r
index 130886e..ef14f0d 100644
--- a/vmstools/R/estimatePropFishing.r
+++ b/vmstools/R/estimatePropFishing.r
@@ -1,243 +1,259 @@
-estimatePropFishing <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF")){
-
-  if(!"SI_STATE" %in% colnames(tacsat))
-    stop("Provide 'SI_STATE' column in tacsat dataset")
-
-
-  #-----------------------------------------------------------------------------
-  #- Add columns on dates
-  #-----------------------------------------------------------------------------
-  if(!"SI_DATIM" %in% colnames(tacsat))
-    tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-  if(!"FT_DDATIM" %in% colnames(eflalo))
-    eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
-  if(!"FT_LDATIM" %in% colnames(eflalo))
-    eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
-  if(!"INTV" %in% colnames(tacsat))
-    stop("Specify time interval column in tacsat (e.g. use intervalTacsat)")
-
-  #- Add date notation
-  if("SI_DAY" %in% unique(c(by))){
-    eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_DAY <- yday(tacsat$SI_DATIM)
-  }
-  if("SI_YEAR" %in% unique(c(by))){
-    eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_YEAR<- year(tacsat$SI_DATIM)
-  }
-  if("SI_MONTH"%in% unique(c(by))){
-    eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_MONTH<- month(tacsat$SI_DATIM)
-  }
-  if("SI_WEEK"%in% unique(c(by))){
-    eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_WEEK<- week(tacsat$SI_DATIM)
-  }
-  if("SI_QUARTER"%in% unique(c(by))){
-    eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM)
-  }
-
-  #- Select time levels + order these
-  timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR")
-  byTime <- by[which(by %in% timePos)]
-  byTime <- timePos[which(timePos %in% byTime)]
-
-  #-----------------------------------------------------------------------------
-  #-Add spatial location
-  #-----------------------------------------------------------------------------
-  if("LE_RECT" %in% by)
-    tacsat$LE_RECT <- ICESrectangle(tacsat)
-  if("LE_ICESAREA" %in% by){
-    data(ICESareas)
-    tacsat$LE_AREA <- ICESarea(tacsat,ICESareas)
-    tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER"
-    eflonlat       <- ICESrectangle2LonLat(eflalo$LE_RECT)
-    eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas)
-    eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER"
-  }
-
-  #- Select area levels + order these
-  areaPos<- c("LE_RECT","LE_ICESAREA")
-  byArea <- by[which(by %in% areaPos)]
-  byArea <- areaPos[which(areaPos %in% byArea)]
-
-  #-----------------------------------------------------------------------------
-  #- Calculate time possible to spend per day fishing
-  #-----------------------------------------------------------------------------
-  eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT")
-  eflalo$INTV      <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins"))
-  eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0),
-                        c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")),
-                        ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
-                          c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")),
-                          ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
-                            c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")),
-                            1440)))
-
-  # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip
-  eflalo$dummy     <- 1
-  eflalo           <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T)
-  colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF"
-  if("SI_DAY" %in% by){
-    eflalo$INTVDAY   <- eflalo$FT_DURDAY / eflalo$NR_FT_REF
-  } else {
-    eflalo$INTVDAY   <- eflalo$INTV / eflalo$NR_FT_REF
-    }
-  eflalo           <- eflalo[,-grep("dummy",colnames(eflalo))]
-  eflalo           <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))]
-  eflalo           <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))]
-
-  #-----------------------------------------------------------------------------
-  #- Check if all colums in both tacsat and eflalo are available
-  #-----------------------------------------------------------------------------
-  if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0)
-    stop("elements specified in 'by' are not available as columns in either tacsat or eflalo")
-
-  #-----------------------------------------------------------------------------
-  #- Estimate proportion fishing? (estimate on a basis which proportion of being out of harbour
-  #  is used to fish and use that proportion to rescale eflalo effort)
-  #-----------------------------------------------------------------------------
-
-  subTacsat             <- subset(tacsat,SI_STATE != 0 & FT_REF != 0)
-  subEflalo             <- eflalo
-
-  if("SI_DAY" %in% by){
-    subEflalo$INTVTRIP  <- subEflalo$INTV
-    subEflalo$INTV      <- subEflalo$INTVDAY
-  } else {
-    subEflalo           <- subEflalo[!duplicated(subEflalo$FT_REF),]
-  }
-  if("LE_GEAR" %in% by & !"LE_GEAR" %in% colnames(subTacsat))
-    subTacsat$LE_GEAR   <- subEflalo$LE_GEAR[match(subTacsat$FT_REF,subEflalo$FT_REF)]
-  if(!"FT_REF" %in% colnames(subTacsat))
-    stop("FT_REF column missing from tacsat")
-
-  #- Calculate effort in tacsat and eflalo on a trip basis
-  if(!"FT_REF" %in% by){
-    by                  <- c(by,"FT_REF")
-    warning("FT_REF has been added to 'by' statement")
-  }
-  byOrig                      <- by
-  idxE                        <- 1:nrow(eflalo)
-  idxEs                       <- 1:nrow(subEflalo)
-  idxTs                       <- 1:nrow(subTacsat)
-
-  #- First go: aggregate INTV on specified level and compare with each other
-  if(length(by)>1){
-    estEffTacsat              <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
-    estEffEflalo              <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
-  } else {
-      estEffTacsat            <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
-      estEffEflalo            <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
-    }
-  colnames(estEffTacsat)      <- c(by,"INTV")
-  colnames(estEffEflalo)      <- c(by,"INTV")
-
-  #- Reset to original estimPropFish
-  mestEff                     <- merge(estEffTacsat,estEffEflalo,by=c(by))
-  allGZero                    <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
-  by                          <- by[-grep("FT_REF",by)]
-
-  #- Calculate estimated proportion of fishing for each eflalo ping
-  if(length(by)>1){
-    estimFish                 <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
-                                           by=as.list(mestEff[allGZero,by]),
-                                           FUN=mean,na.rm=T)
-  } else {
-    estimFish                 <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
-                                           by=list(mestEff[allGZero,by]),
-                                           FUN=mean,na.rm=T)
-  }
-  estimFish$x                 <- exp(estimFish$x)
-  colnames(estimFish)         <- c(by,"x")
-  allGZero                    <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
-  allMean                     <- exp(mean(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero])))
-
-  #- Add fishing proportion parameter to eflalo (x)
-  eflalo$x[idxE]              <- merge(eflalo[   idxE, ],estimFish,by=by,all.x=T)$x
-  subEflalo$x[idxEs]          <- merge(subEflalo[idxEs,],estimFish,by=by,all.x=T)$x
-  idxE                        <- which(is.na(   eflalo$x)==T)
-  idxEs                       <- which(is.na(subEflalo$x)==T)
-  idxTs                       <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in%
-                                        apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_"))
-
-  #- Second go: aggregate INTV on specified level and compare with each other but drop one element time at time
-  if(length(idxTs)>0 & length(idxEs)>0){
-    if(length(by)>1){
-      for(i in 1:(length(by)-1)){
-        by                      <- rev(rev(byOrig[-grep("FT_REF",byOrig)])[-c(1:i)])
-        by                      <- c(by,"FT_REF")
-
-        if(!"SI_DAY" %in% by){
-          subEflalo             <- subEflalo[!duplicated(subEflalo$FT_REF),]
-          subEflalo$INTV        <- subEflalo$INTVTRIP
-        }
-
-        if(length(by)>1){
-          estEffTacsat          <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
-          estEffEflalo          <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
-        } else {
-          estEffTacsat          <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
-          estEffEflalo          <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
-        }
-        colnames(estEffEflalo)  <- c(by,"INTV")
-        colnames(estEffTacsat)  <- c(by,"INTV")
-
-        mestEff                 <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c(by))
-        allGZero                <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
-
-        #- Reset to original estimPropFish
-        by                      <- by[-grep("FT_REF",by)]
-        if(length(by)>1){
-          estimFish             <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
-                                             by=as.list(mestEff[allGZero,by]),
-                                             FUN=mean,na.rm=T)
-        } else {
-          estimFish             <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
-                                             by=list(mestEff[allGZero,by]),
-                                             FUN=mean,na.rm=T)
-        }
-        estimFish$x             <- exp(estimFish$x)
-        colnames(estimFish)     <- c(by,"x")
-        eflalo$x[idxE]          <- merge(eflalo   [idxE, -grep("x",colnames(   eflalo))],estimFish,by=by,all.x=T)$x
-        subEflalo$x[idxEs]      <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by=by,all.x=T)$x
-        idxE                    <- which(is.na(   eflalo$x)==T)
-        idxEs                   <- which(is.na(subEflalo$x)==T)
-        idxTs                   <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in%
-                                          apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_"))
-      }
-    }
-  }
-  #- Third go: aggregate INTV on FT_REF level only
-  if(length(idxEs)>0 & length(idxTs)>0){
-    if(!"SI_DAY" %in% by){
-      subEflalo               <- subEflalo[!duplicated(subEflalo$FT_REF),]
-      subEflalo$INTV          <- subEflalo$INTVTRIP
-    }
-    estEffTacsat              <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,"FT_REF"]),FUN=sum,na.rm=T)
-    estEffEflalo              <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,"FT_REF"]),FUN=sum,na.rm=T)
-    colnames(estEffEflalo)    <- c("FT_REF","INTV")
-    colnames(estEffTacsat)    <- c("FT_REF","INTV")
-    mestEff                   <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c("FT_REF"))
-    allGZero                  <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
-    estimFish                 <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
-                                  by=list(mestEff[allGZero,"FT_REF"]),
-                                  FUN=mean,na.rm=T)
-    estimFish$x               <- exp(estimFish$x)
-    colnames(estimFish)       <- c("FT_REF","x")
-    eflalo$x[idxE]            <- merge(eflalo   [idxE, -grep("x",colnames(   eflalo))],estimFish,by="FT_REF",all.x=T)$x
-    subEflalo$x[idxEs]        <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by="FT_REF",all.x=T)$x
-    idxE                      <- which(is.na(   eflalo$x)==T)
-    idxEs                     <- which(is.na(subEflalo$x)==T)
-    idxTs                     <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in%
-                                        apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_"))
-  }
-  if(length(idxEs)>0){
-    subEflalo$x[idxEs]        <- allMean
-    eflalo$x[idxE]            <- allMean
-    colnames(subEflalo)[length(colnames(subEflalo))] <- "PropFish"
-    colnames(eflalo)[   length(colnames(   eflalo))] <- "PropFish"
-  }
-  colnames(eflalo)[   length(colnames(   eflalo))] <- "PropFish"
-return(eflalo)}
+#' Estimate the proportion fishing per fishing trip
+#' 
+#' Estimate what proportion of time a fishing vessel is fishing when outside of
+#' harbour
+#' 
+#' 
+#' @param tacsat tacsat dataset, including FT_REF column
+#' @param eflalo eflalo dataset
+#' @param by column names to calculate the estimation over (e.g. LE_GEAR to
+#' estimate proportion fishing by gear or VE_REF to estimate proportion fishing
+#' by vessel ID)
+#' @return Returns the eflalo dataset including a new column 'propFish' which
+#' provides the estimated proportion fishing for each entry
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{raiseTacsat}}
+#' @export estimatePropFishing
+estimatePropFishing <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF")){
+
+  if(!"SI_STATE" %in% colnames(tacsat))
+    stop("Provide 'SI_STATE' column in tacsat dataset")
+
+
+  #-----------------------------------------------------------------------------
+  #- Add columns on dates
+  #-----------------------------------------------------------------------------
+  if(!"SI_DATIM" %in% colnames(tacsat))
+    tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+  if(!"FT_DDATIM" %in% colnames(eflalo))
+    eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
+  if(!"FT_LDATIM" %in% colnames(eflalo))
+    eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
+  if(!"INTV" %in% colnames(tacsat))
+    stop("Specify time interval column in tacsat (e.g. use intervalTacsat)")
+
+  #- Add date notation
+  if("SI_DAY" %in% unique(c(by))){
+    eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_DAY <- yday(tacsat$SI_DATIM)
+  }
+  if("SI_YEAR" %in% unique(c(by))){
+    eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_YEAR<- year(tacsat$SI_DATIM)
+  }
+  if("SI_MONTH"%in% unique(c(by))){
+    eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_MONTH<- month(tacsat$SI_DATIM)
+  }
+  if("SI_WEEK"%in% unique(c(by))){
+    eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_WEEK<- week(tacsat$SI_DATIM)
+  }
+  if("SI_QUARTER"%in% unique(c(by))){
+    eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM)
+  }
+
+  #- Select time levels + order these
+  timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR")
+  byTime <- by[which(by %in% timePos)]
+  byTime <- timePos[which(timePos %in% byTime)]
+
+  #-----------------------------------------------------------------------------
+  #-Add spatial location
+  #-----------------------------------------------------------------------------
+  if("LE_RECT" %in% by)
+    tacsat$LE_RECT <- ICESrectangle(tacsat)
+  if("LE_ICESAREA" %in% by){
+    data(ICESareas)
+    tacsat$LE_AREA <- ICESarea(tacsat,ICESareas)
+    tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER"
+    eflonlat       <- ICESrectangle2LonLat(eflalo$LE_RECT)
+    eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas)
+    eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER"
+  }
+
+  #- Select area levels + order these
+  areaPos<- c("LE_RECT","LE_ICESAREA")
+  byArea <- by[which(by %in% areaPos)]
+  byArea <- areaPos[which(areaPos %in% byArea)]
+
+  #-----------------------------------------------------------------------------
+  #- Calculate time possible to spend per day fishing
+  #-----------------------------------------------------------------------------
+  eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT")
+  eflalo$INTV      <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins"))
+  eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0),
+                        c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")),
+                        ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
+                          c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")),
+                          ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
+                            c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")),
+                            1440)))
+
+  # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip
+  eflalo$dummy     <- 1
+  eflalo           <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T)
+  colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF"
+  if("SI_DAY" %in% by){
+    eflalo$INTVDAY   <- eflalo$FT_DURDAY / eflalo$NR_FT_REF
+  } else {
+    eflalo$INTVDAY   <- eflalo$INTV / eflalo$NR_FT_REF
+    }
+  eflalo           <- eflalo[,-grep("dummy",colnames(eflalo))]
+  eflalo           <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))]
+  eflalo           <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))]
+
+  #-----------------------------------------------------------------------------
+  #- Check if all colums in both tacsat and eflalo are available
+  #-----------------------------------------------------------------------------
+  if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0)
+    stop("elements specified in 'by' are not available as columns in either tacsat or eflalo")
+
+  #-----------------------------------------------------------------------------
+  #- Estimate proportion fishing? (estimate on a basis which proportion of being out of harbour
+  #  is used to fish and use that proportion to rescale eflalo effort)
+  #-----------------------------------------------------------------------------
+
+  subTacsat             <- subset(tacsat,SI_STATE != 0 & FT_REF != 0)
+  subEflalo             <- eflalo
+
+  if("SI_DAY" %in% by){
+    subEflalo$INTVTRIP  <- subEflalo$INTV
+    subEflalo$INTV      <- subEflalo$INTVDAY
+  } else {
+    subEflalo           <- subEflalo[!duplicated(subEflalo$FT_REF),]
+  }
+  if("LE_GEAR" %in% by & !"LE_GEAR" %in% colnames(subTacsat))
+    subTacsat$LE_GEAR   <- subEflalo$LE_GEAR[match(subTacsat$FT_REF,subEflalo$FT_REF)]
+  if(!"FT_REF" %in% colnames(subTacsat))
+    stop("FT_REF column missing from tacsat")
+
+  #- Calculate effort in tacsat and eflalo on a trip basis
+  if(!"FT_REF" %in% by){
+    by                  <- c(by,"FT_REF")
+    warning("FT_REF has been added to 'by' statement")
+  }
+  byOrig                      <- by
+  idxE                        <- 1:nrow(eflalo)
+  idxEs                       <- 1:nrow(subEflalo)
+  idxTs                       <- 1:nrow(subTacsat)
+
+  #- First go: aggregate INTV on specified level and compare with each other
+  if(length(by)>1){
+    estEffTacsat              <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
+    estEffEflalo              <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
+  } else {
+      estEffTacsat            <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
+      estEffEflalo            <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
+    }
+  colnames(estEffTacsat)      <- c(by,"INTV")
+  colnames(estEffEflalo)      <- c(by,"INTV")
+
+  #- Reset to original estimPropFish
+  mestEff                     <- merge(estEffTacsat,estEffEflalo,by=c(by))
+  allGZero                    <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
+  by                          <- by[-grep("FT_REF",by)]
+
+  #- Calculate estimated proportion of fishing for each eflalo ping
+  if(length(by)>1){
+    estimFish                 <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
+                                           by=as.list(mestEff[allGZero,by]),
+                                           FUN=mean,na.rm=T)
+  } else {
+    estimFish                 <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
+                                           by=list(mestEff[allGZero,by]),
+                                           FUN=mean,na.rm=T)
+  }
+  estimFish$x                 <- exp(estimFish$x)
+  colnames(estimFish)         <- c(by,"x")
+  allGZero                    <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
+  allMean                     <- exp(mean(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero])))
+
+  #- Add fishing proportion parameter to eflalo (x)
+  eflalo$x[idxE]              <- merge(eflalo[   idxE, ],estimFish,by=by,all.x=T)$x
+  subEflalo$x[idxEs]          <- merge(subEflalo[idxEs,],estimFish,by=by,all.x=T)$x
+  idxE                        <- which(is.na(   eflalo$x)==T)
+  idxEs                       <- which(is.na(subEflalo$x)==T)
+  idxTs                       <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in%
+                                        apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_"))
+
+  #- Second go: aggregate INTV on specified level and compare with each other but drop one element time at time
+  if(length(idxTs)>0 & length(idxEs)>0){
+    if(length(by)>1){
+      for(i in 1:(length(by)-1)){
+        by                      <- rev(rev(byOrig[-grep("FT_REF",byOrig)])[-c(1:i)])
+        by                      <- c(by,"FT_REF")
+
+        if(!"SI_DAY" %in% by){
+          subEflalo             <- subEflalo[!duplicated(subEflalo$FT_REF),]
+          subEflalo$INTV        <- subEflalo$INTVTRIP
+        }
+
+        if(length(by)>1){
+          estEffTacsat          <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
+          estEffEflalo          <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
+        } else {
+          estEffTacsat          <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T)
+          estEffEflalo          <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T)
+        }
+        colnames(estEffEflalo)  <- c(by,"INTV")
+        colnames(estEffTacsat)  <- c(by,"INTV")
+
+        mestEff                 <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c(by))
+        allGZero                <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
+
+        #- Reset to original estimPropFish
+        by                      <- by[-grep("FT_REF",by)]
+        if(length(by)>1){
+          estimFish             <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
+                                             by=as.list(mestEff[allGZero,by]),
+                                             FUN=mean,na.rm=T)
+        } else {
+          estimFish             <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
+                                             by=list(mestEff[allGZero,by]),
+                                             FUN=mean,na.rm=T)
+        }
+        estimFish$x             <- exp(estimFish$x)
+        colnames(estimFish)     <- c(by,"x")
+        eflalo$x[idxE]          <- merge(eflalo   [idxE, -grep("x",colnames(   eflalo))],estimFish,by=by,all.x=T)$x
+        subEflalo$x[idxEs]      <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by=by,all.x=T)$x
+        idxE                    <- which(is.na(   eflalo$x)==T)
+        idxEs                   <- which(is.na(subEflalo$x)==T)
+        idxTs                   <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in%
+                                          apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_"))
+      }
+    }
+  }
+  #- Third go: aggregate INTV on FT_REF level only
+  if(length(idxEs)>0 & length(idxTs)>0){
+    if(!"SI_DAY" %in% by){
+      subEflalo               <- subEflalo[!duplicated(subEflalo$FT_REF),]
+      subEflalo$INTV          <- subEflalo$INTVTRIP
+    }
+    estEffTacsat              <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,"FT_REF"]),FUN=sum,na.rm=T)
+    estEffEflalo              <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,"FT_REF"]),FUN=sum,na.rm=T)
+    colnames(estEffEflalo)    <- c("FT_REF","INTV")
+    colnames(estEffTacsat)    <- c("FT_REF","INTV")
+    mestEff                   <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c("FT_REF"))
+    allGZero                  <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0)
+    estimFish                 <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]),
+                                  by=list(mestEff[allGZero,"FT_REF"]),
+                                  FUN=mean,na.rm=T)
+    estimFish$x               <- exp(estimFish$x)
+    colnames(estimFish)       <- c("FT_REF","x")
+    eflalo$x[idxE]            <- merge(eflalo   [idxE, -grep("x",colnames(   eflalo))],estimFish,by="FT_REF",all.x=T)$x
+    subEflalo$x[idxEs]        <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by="FT_REF",all.x=T)$x
+    idxE                      <- which(is.na(   eflalo$x)==T)
+    idxEs                     <- which(is.na(subEflalo$x)==T)
+    idxTs                     <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in%
+                                        apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_"))
+  }
+  if(length(idxEs)>0){
+    subEflalo$x[idxEs]        <- allMean
+    eflalo$x[idxE]            <- allMean
+    colnames(subEflalo)[length(colnames(subEflalo))] <- "PropFish"
+    colnames(eflalo)[   length(colnames(   eflalo))] <- "PropFish"
+  }
+  colnames(eflalo)[   length(colnames(   eflalo))] <- "PropFish"
+return(eflalo)}
diff --git a/vmstools/R/extractTableMainSpecies.r b/vmstools/R/extractTableMainSpecies.r
index 9297ca2..cd78048 100644
--- a/vmstools/R/extractTableMainSpecies.r
+++ b/vmstools/R/extractTableMainSpecies.r
@@ -1,72 +1,149 @@
-################################################################
-#   STEP 1 OF THE MULTIVARIATE CLASSIFICATION :                #
-#          SELECT MAIN SPECIES OF THE DATASET                  #
-#          AND                                                 #
-#          EXTRACT A NEW DATASET WITH ONLY THE MAIN SPECIES    #
-################################################################
-
-
-extractTableMainSpecies = function(dat,namesMainSpeciesHAC,paramTotal=95,paramLogevent=100){
-
-    print("######## STEP 1 COMBINATION OF MAIN SPECIES FROM THE THREE EXPLORATORY METHODS ########")
-    t1 <- Sys.time()
-
-    # TOTALE
-    
-    p=ncol(dat)   # Number of species +1
-    print("Calculating proportions...")
-    propdat=transformation_proportion(dat[,2:p])
-    namesSpecies=colnames(propdat)
-
-    # Total quantity caught species by species
-    sumcol=rep(as.numeric(NA),p-1) #numeric()
-    for(i in 2:p) sumcol[i-1]=sum(dat[,i])
-    names(sumcol)=namesSpecies
-
-    # Percent of each species in the total catch
-    propSp=sumcol/sum(sumcol,na.rm=TRUE)*100
-    # Columns number of each species by decreasing order of capture
-    numSp=order(propSp,decreasing=TRUE)
-    # Percent of each species in the total catch by decreasing order
-    propSp=cumsum(propSp[order(propSp,decreasing=TRUE)])
-
-    Store(objects())
-    gc(reset=TRUE)
-
-    # We are taking all species until having at least param1% of total catch
-    if (is.null(paramTotal) | !is.numeric(paramTotal)) stop("param1 must be numeric between 0 and 100")
-    threshold=paramTotal
-    pourcent=which(propSp<=threshold)
-    # We are taking the name of selected species
-    selSpecies=numSp[1:(length(pourcent)+1)]
-    namesSelSpeciesTotal=namesSpecies[selSpecies]
-
-
-
-    # LOGEVENT
-    
-    if (is.null(paramLogevent) | !is.numeric(paramLogevent)) stop("paramLogevent must be numeric between 0 and 100")
-
-    threshold=paramLogevent
-    # Selection of species making up over param1% of logevent's captures
-    namesSelSpeciesLogevent=character()
-    for (i in namesSpecies) {
-       if (!is.na(any(propdat[,i]>=threshold)) && any(propdat[,i]>=threshold)) namesSelSpeciesLogevent=c(namesSelSpeciesLogevent,i)
-    }
-
-    # Merge with explospecies
-    listSpeciesAll <- sort(unique(c(namesMainSpeciesHAC,namesSelSpeciesTotal,namesSelSpeciesLogevent)))
-    listSpeciesAll <- listSpeciesAll[!listSpeciesAll=="MZZ"]
-
-    # We are building the table with main species and aggregated other species
-    datSpecies=building_tab_pca(propdat,listSpeciesAll)
-    rownames(datSpecies)=dat[,1]
-
-
-    print(" --- end of step 1 ---")
-    print(Sys.time()-t1)
-    
-    return(datSpecies)
-
-}
-
+################################################################
+#   STEP 1 OF THE MULTIVARIATE CLASSIFICATION :                #
+#          SELECT MAIN SPECIES OF THE DATASET                  #
+#          AND                                                 #
+#          EXTRACT A NEW DATASET WITH ONLY THE MAIN SPECIES    #
+################################################################
+
+
+
+
+#' Finding Metiers from a reduced EFLALO dataset, step 1 : selecting the
+#' species considered as important for the analysis of target assemblages, by
+#' crossing three different approaches.
+#' 
+#' This function represents the first step in the multivariate analysis of
+#' logbooks data for identifying metiers.
+#' 
+#' A typical logbook dataset will contain a large number of species recorded,
+#' but only a limited number of these could potentially be considered as target
+#' species. This function builds further on the exploration conducted with the
+#' function selectMainSpecies() by combining three different approaches : - HAC
+#' (Hierarchical Ascending Classification) based on Euclidian distances between
+#' species with Ward aggregating criteria; - Total, where species are ranked
+#' based on their proportion in the total catches, and those cumulating to a
+#' given percentage are retained - Logevent, where species are selected if they
+#' represent at least a given percentage of at least one logevent (one line)
+#' 
+#' All the three methods work on catch data transformed in percentage of
+#' species by logevent (line), in order to remove the effect of large hauls
+#' compared to small hauls.  The HAC method is not run in this function, but in
+#' the selectMainSpecies() function, which outputs are directly used here. In
+#' addition, the relevant thresholds for the two other methods Total and
+#' Logevent must be selected.
+#' 
+#' 
+#' @param dat a data frame reduced from an eflalo format. It should contain
+#' only the LE_ID (Logevent ID) variable as well as all species names in
+#' columns, with raw catch data. It is necessary to sort out potential
+#' error-prone lines (such as lines with only 0) prior to the analysis, and to
+#' replace NA values by 0.
+#' @param namesMainSpeciesHAC character. The names of species retained by the
+#' HAC method selectMainSpecies() function. If omitted, only the two other
+#' methods will be used for selecting species.
+#' @param paramTotal numeric. The percentage threshold for the Total method.
+#' All species ranked in terms of percentage in total catches and cumulating up
+#' to this percentage value will be retained. Default value is 95\%.
+#' @param paramLogevent numeric. The percentage threshold for the Logevent
+#' method. All species representing at least this threshold of at least one
+#' logevent (one line) will be retained. Default value is 100\% (logevents
+#' catching one and only one species).
+#' @return The function returns a numerical matrix with Logevents as lines and
+#' species as columns, with percentage values (between 0 and 100) of each
+#' species in the logevent catches.  Logevent ID (LE_ID) are returned as row
+#' names.
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @seealso \code{\link{selectMainSpecies}}
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @examples
+#' 
+#' 
+#' data(eflalo)
+#'   
+#' eflalo <- formatEflalo(eflalo)
+#' 
+#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",]
+#' # note that output plots will be sent to getwd()
+#' analysisName <- "metier_analysis_OTB"
+#' 
+#' dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))]
+#' names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]]))
+#' 
+#' explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE)
+#'   #=> send the LE_ID and LE_EURO_SP columns only
+#'          
+#' Step1 <- extractTableMainSpecies(dat, explo$namesMainSpeciesHAC,
+#'             paramTotal=95, paramLogevent=100)
+#' #=> send the LE_ID and LE_EURO_SP columns only
+#'              
+#' 
+#' @export extractTableMainSpecies
+extractTableMainSpecies = function(dat,namesMainSpeciesHAC,paramTotal=95,paramLogevent=100){
+
+    print("######## STEP 1 COMBINATION OF MAIN SPECIES FROM THE THREE EXPLORATORY METHODS ########")
+    t1 <- Sys.time()
+
+    # TOTALE
+    
+    p=ncol(dat)   # Number of species +1
+    print("Calculating proportions...")
+    propdat=transformation_proportion(dat[,2:p])
+    namesSpecies=colnames(propdat)
+
+    # Total quantity caught species by species
+    sumcol=rep(as.numeric(NA),p-1) #numeric()
+    for(i in 2:p) sumcol[i-1]=sum(dat[,i])
+    names(sumcol)=namesSpecies
+
+    # Percent of each species in the total catch
+    propSp=sumcol/sum(sumcol,na.rm=TRUE)*100
+    # Columns number of each species by decreasing order of capture
+    numSp=order(propSp,decreasing=TRUE)
+    # Percent of each species in the total catch by decreasing order
+    propSp=cumsum(propSp[order(propSp,decreasing=TRUE)])
+
+    Store(objects())
+    gc(reset=TRUE)
+
+    # We are taking all species until having at least param1% of total catch
+    if (is.null(paramTotal) | !is.numeric(paramTotal)) stop("param1 must be numeric between 0 and 100")
+    threshold=paramTotal
+    pourcent=which(propSp<=threshold)
+    # We are taking the name of selected species
+    selSpecies=numSp[1:(length(pourcent)+1)]
+    namesSelSpeciesTotal=namesSpecies[selSpecies]
+
+
+
+    # LOGEVENT
+    
+    if (is.null(paramLogevent) | !is.numeric(paramLogevent)) stop("paramLogevent must be numeric between 0 and 100")
+
+    threshold=paramLogevent
+    # Selection of species making up over param1% of logevent's captures
+    namesSelSpeciesLogevent=character()
+    for (i in namesSpecies) {
+       if (!is.na(any(propdat[,i]>=threshold)) && any(propdat[,i]>=threshold)) namesSelSpeciesLogevent=c(namesSelSpeciesLogevent,i)
+    }
+
+    # Merge with explospecies
+    listSpeciesAll <- sort(unique(c(namesMainSpeciesHAC,namesSelSpeciesTotal,namesSelSpeciesLogevent)))
+    listSpeciesAll <- listSpeciesAll[!listSpeciesAll=="MZZ"]
+
+    # We are building the table with main species and aggregated other species
+    datSpecies=building_tab_pca(propdat,listSpeciesAll)
+    rownames(datSpecies)=dat[,1]
+
+
+    print(" --- end of step 1 ---")
+    print(Sys.time()-t1)
+    
+    return(datSpecies)
+
+}
+
diff --git a/vmstools/R/filterTacsat.r b/vmstools/R/filterTacsat.r
index 3088583..20f4f3c 100644
--- a/vmstools/R/filterTacsat.r
+++ b/vmstools/R/filterTacsat.r
@@ -1,37 +1,70 @@
-`filterTacsat` <-
-function(tacsat
-                         ,st=c(2,6)                #Speed threshold points (two values), NULL means use all points
-                         ,hd=NULL                  #Heading threshold points (two values), NULL means use all points
-                         ,remDup=TRUE              #Specify if you want to remove duplicated VMS records (these should not occur in the first place)
-                         ){
-  VMS <- tacsat
-if(!"SI_DATIM" %in% colnames(VMS))  VMS$SI_DATIM     <- as.POSIXct(paste(VMS$SI_DATE,  VMS$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-  #Remove duplicate records
-if(is.null(remDup)==FALSE){
-  uniqueVMS <- which(duplicated(VMS[,c("VE_REF","SI_DATIM")])==FALSE)
-  VMS.      <- VMS[uniqueVMS,]
-  if(dim(VMS.)[1] != dim(VMS)[1]) warning("duplicate records have been removed")
-} else {
-    VMS. <- VMS
-  }
-
-if(is.null(hd)==FALSE){
-  warning("It is assumed that VMS data is sorted by vessel and date")
-    #Calculate the difference in heading between succeeding VMS datapoints
-  diffHeading                           <- abs(VMS.$SI_HE[2:dim(VMS.)[1]] - VMS.$SI_HE[1:(dim(VMS.)[1]-1)])
-    #If there is a change to a new vessel in the data, put diffHeading to NA
-  diffHeading[unlist(lapply(as.list(unique(VMS.$VE_REF)),function(x){return(which(VMS.$VE_REF == x)[1])}))[-1]-1] <- NA
-  VMS.$diffHeading                      <- 0
-    #Store the difference in heading
-  VMS.$diffHeading[1:(dim(VMS.)[1]-1)]  <- diffHeading
-    #Subset based on the absolute difference in heading
-  VMS.                                  <- subset(VMS.,diffHeading %in% seq(hd[1],hd[2],1))
-}
-
-if(is.null(st)==FALSE){
-    #Subset based on the speed ranges
-  if(length(st)>2) stop("More than two speed values selected")
-  VMS. <- VMS.[which(VMS.$SI_SP >= st[1] & VMS.$SI_SP <= st[2]),]
-}
-return(VMS.[,-grep("SI_DATIM",colnames(VMS.))])}
-
+#' Filter out duplicated or out-of-range Tacsat datapoints
+#' 
+#' Filter out duplicated Tacsat datapoints and possible remove Tacsat
+#' datapoints that are outside a certain speed range or heading range. A new
+#' Tacsat set, without the out-filtered records is returned.
+#' 
+#' hd is computed as the change in heading from the previous Tacsat point (if
+#' ship remains the same). Hence, hd does not equal the heading as given in the
+#' Tacsat dataset
+#' 
+#' @param tacsat Original VMS data.frame with columns speed, heading, ship and
+#' date
+#' @param st Speed thresholds in between records are kept. Begin and end
+#' threshold or NULL
+#' @param hd Heading thresholds in between records are kept. Begin and end
+#' threshold or NULL
+#' @param remDup Remove duplicates: True or False
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{sortTacsat}}
+#' @references EU lot 2 project, see also: Mills et al. 2006
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#'   #Sort the VMS data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Filter the VMS data
+#'   #A warning is given that duplicated VMS points have been removed
+#' tacsat     <- filterTacsat(tacsat,c(2,6),hd=NULL,remDup=TRUE)
+#' 
+#' @export filterTacsat
+`filterTacsat` <-
+function(tacsat
+                         ,st=c(2,6)                #Speed threshold points (two values), NULL means use all points
+                         ,hd=NULL                  #Heading threshold points (two values), NULL means use all points
+                         ,remDup=TRUE              #Specify if you want to remove duplicated VMS records (these should not occur in the first place)
+                         ){
+  VMS <- tacsat
+if(!"SI_DATIM" %in% colnames(VMS))  VMS$SI_DATIM     <- as.POSIXct(paste(VMS$SI_DATE,  VMS$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+  #Remove duplicate records
+if(is.null(remDup)==FALSE){
+  uniqueVMS <- which(duplicated(VMS[,c("VE_REF","SI_DATIM")])==FALSE)
+  VMS.      <- VMS[uniqueVMS,]
+  if(dim(VMS.)[1] != dim(VMS)[1]) warning("duplicate records have been removed")
+} else {
+    VMS. <- VMS
+  }
+
+if(is.null(hd)==FALSE){
+  warning("It is assumed that VMS data is sorted by vessel and date")
+    #Calculate the difference in heading between succeeding VMS datapoints
+  diffHeading                           <- abs(VMS.$SI_HE[2:dim(VMS.)[1]] - VMS.$SI_HE[1:(dim(VMS.)[1]-1)])
+    #If there is a change to a new vessel in the data, put diffHeading to NA
+  diffHeading[unlist(lapply(as.list(unique(VMS.$VE_REF)),function(x){return(which(VMS.$VE_REF == x)[1])}))[-1]-1] <- NA
+  VMS.$diffHeading                      <- 0
+    #Store the difference in heading
+  VMS.$diffHeading[1:(dim(VMS.)[1]-1)]  <- diffHeading
+    #Subset based on the absolute difference in heading
+  VMS.                                  <- subset(VMS.,diffHeading %in% seq(hd[1],hd[2],1))
+}
+
+if(is.null(st)==FALSE){
+    #Subset based on the speed ranges
+  if(length(st)>2) stop("More than two speed values selected")
+  VMS. <- VMS.[which(VMS.$SI_SP >= st[1] & VMS.$SI_SP <= st[2]),]
+}
+return(VMS.[,-grep("SI_DATIM",colnames(VMS.))])}
+
diff --git a/vmstools/R/findArea.r b/vmstools/R/findArea.r
index 2c91cee..f66ba23 100644
--- a/vmstools/R/findArea.r
+++ b/vmstools/R/findArea.r
@@ -1,32 +1,70 @@
-findArea <- function(grid,threshold=100,diagonal=TRUE){
-
-  if(class(grid) != "SpatialGridDataFrame") stop(paste("Function not defined for class",class(grid)))
-  if(!"value" %in% colnames(grid@data)) stop("No 'value' column available in data slot")
-  grid    <- surface(grid)
-  coords  <- coordinates(grid)
-  
-  resx    <- grid@grid@cellsize[1]
-  resy    <- grid@grid@cellsize[2]
-  
-  storeVals     <- data.frame(minval=rep(0,nrow(coords)),idxs=rep(NA,nrow(coords)))
-  for(i in 1:nrow(coords)){
-    totSurf     <- 0
-    pts         <- i
-    while(totSurf <= threshold){
-      dists     <- do.call(pmin, lapply(as.list(pts), function(x) {
-                    distance(coords[x, 1], coords[x, 2], coords[-pts, 1], coords[-pts,2])}))
-      if(diagonal==TRUE) idx       <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) {
-                                                max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]-resy),
-                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]+resy))})))]
-      if(diagonal==FALSE) idx       <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) {
-                                                max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]),
-                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]),
-                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]-resy),
-                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]+resy))})))]
-      pts       <- unique(c(pts,idx[which.min(grid@data$value[idx])]))
-      totSurf   <- sum(grid@data$cellArea[pts])
-    }
-    storeVals$minval[i] <- grid@data$value[pts]
-    storeVals$idxs[i]   <- paste(pts,collapse= " ")
-  }
-  return(storeVals)}
+#' Find area surface smaller than threshold
+#' 
+#' Find gridcells with maximum surface equal or smaller to 'threshold' and
+#' return their total value and gridcells involved
+#' 
+#' Diagonal means if area may include x+1,y+1 steps v versus only x+1 or y+1
+#' steps
+#' 
+#' @param grid A SpatialGridDataFrame
+#' @param threshold Maximum value of surfaces added up
+#' @param diagonal Allow diagonal steps in gridcell selection
+#' @return data.frame with minimum surface area and gridcells selected
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{surface}}
+#' @examples
+#' 
+#' xrange  <- c(0,4)
+#' yrange  <- c(52,56)
+#' resx    <- 0.25
+#' resy    <- 0.125
+#' 
+#' #-create grid and assign value column
+#' grd <- createGrid(xrange,yrange,resx,resy,type="SpatialGridDataFrame",exactBorder=TRUE)
+#' grd@data$value <- runif(nrow(coordinates(grd)),5,10)
+#' 
+#' #- find gridcells with maximum surface equal or smaller to 'threshold' and return their
+#' #  total value and gridcells involved (diagonal means if area may include x+1,y+1 steps v
+#' #  versus only x+1 or y+1 steps). Note, the larger the threshold, the longer the function
+#' #  will run!
+#' res     <- findArea(grd,threshold=1000,diagonal=TRUE)
+#' 
+#' #- Plot the result
+#' plot(grd,type="p",pch=19,cex=0.5)
+#' map.axes()
+#' selec   <- which.min(res$minval)
+#' points(coordinates(grd)[as.numeric(unlist(strsplit(res[selec,"idxs"]," "))),],col=2,lwd=3)
+#' 
+#' @export findArea
+findArea <- function(grid,threshold=100,diagonal=TRUE){
+
+  if(class(grid) != "SpatialGridDataFrame") stop(paste("Function not defined for class",class(grid)))
+  if(!"value" %in% colnames(grid@data)) stop("No 'value' column available in data slot")
+  grid    <- surface(grid)
+  coords  <- coordinates(grid)
+  
+  resx    <- grid@grid@cellsize[1]
+  resy    <- grid@grid@cellsize[2]
+  
+  storeVals     <- data.frame(minval=rep(0,nrow(coords)),idxs=rep(NA,nrow(coords)))
+  for(i in 1:nrow(coords)){
+    totSurf     <- 0
+    pts         <- i
+    while(totSurf <= threshold){
+      dists     <- do.call(pmin, lapply(as.list(pts), function(x) {
+                    distance(coords[x, 1], coords[x, 2], coords[-pts, 1], coords[-pts,2])}))
+      if(diagonal==TRUE) idx       <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) {
+                                                max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]-resy),
+                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]+resy))})))]
+      if(diagonal==FALSE) idx       <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) {
+                                                max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]),
+                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]),
+                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]-resy),
+                                                    distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]+resy))})))]
+      pts       <- unique(c(pts,idx[which.min(grid@data$value[idx])]))
+      totSurf   <- sum(grid@data$cellArea[pts])
+    }
+    storeVals$minval[i] <- grid@data$value[pts]
+    storeVals$idxs[i]   <- paste(pts,collapse= " ")
+  }
+  return(storeVals)}
diff --git a/vmstools/R/findEndTacsat.r b/vmstools/R/findEndTacsat.r
index 08a1d94..709fda0 100644
--- a/vmstools/R/findEndTacsat.r
+++ b/vmstools/R/findEndTacsat.r
@@ -1,33 +1,75 @@
-`findEndTacsat` <-
-function(SI_DATIM
-                          ,startVMS #Starting point of VMS
-                          ,interval #Specify in minutes, NULL means use all points  
-                          ,margin   #Specify the margin in minutes it might deviate from the interval time, in minutes
-                       ){
-
-  #Calculate the difference in time between the starting VMS point and its succeeding points
-  diffTime          <- difftime(SI_DATIM[(startVMS+1):length(SI_DATIM)],SI_DATIM[startVMS],units=c("mins"))
-  if(length(which(diffTime >= (interval-margin) & diffTime <= (interval+margin)))==0){
-    warning("No succeeding point found, no interpolation possible")
-    endVMS          <- NA
-    endDataSet      <- 3
-      #Check if end of dataset has been reached
-    ifelse(all((diffTime < (interval-margin))==TRUE),endDataSet <- 1,endDataSet <- 0)
-  } else {
-      res           <- which(diffTime >= (interval-margin) & diffTime <= (interval+margin))
-      if(length(res)>1){
-        res2        <- which.min(abs(interval-an(diffTime[res])))
-        endVMS      <- startVMS + res[res2]
-        endDataSet  <- 0
-      } else {
-          endVMS    <- startVMS + res
-          endDataSet<- 0
-        }
-      }
-    #Build-in check
-  if(is.na(endVMS)==FALSE){
-    if(!an(difftime(SI_DATIM[endVMS],SI_DATIM[startVMS],units=c("mins"))) %in% seq((interval-margin),(interval+margin),1)) stop("found endVMS point not within interval range")
-    endVMS <- (endVMS - startVMS)
-  }
-return(c(endVMS,endDataSet))}
-
+#' Finding the succeeding Tacsat datapoint based on an interval with a
+#' specified margin
+#' 
+#' To create an interpolation, two succeeding Tacsat datapoints are needed.
+#' This function finds the succeeding Tacsat point and tests if the point is
+#' within the specified time interval and margins. As well, if no succeeding
+#' datapoint can be found, this information is returned
+#' 
+#' Interval: In most Tacsat datasets the succeeding datapoint can be found 1 or
+#' 2 hours appart. This interval time should be specified here. Interval can
+#' also be specified as e.g. 15 minutes if the Tacsat / GPS dataset allows
+#' this. Margin: Hardly ever, the interval time is precise. To allow some
+#' deviation from the 1 or 2 hour interval the margin can be adjusted.
+#' 
+#' The result returned consists of 2 values. The first value is the index of
+#' the Tacsat set specified of the succeeding datapoint. The second value
+#' indicates if the dataset has ended. If 1st: NA and 2nd 0 then no succeeding
+#' Tacsat point could be found in the specified interval. If 1st: NA and 2nd 1
+#' then no succeeding Tacsat point could be found and end of dataset for a
+#' specific vessel has been reached. If 1st: NA and 2nd 2 then no succeeding
+#' Tacsat point could be found and end of complete dataset has been reached. If
+#' 1st: value then 2nd will be 0, succeeding Tacsat point is found and is
+#' specified in 1st value.
+#' 
+#' @param tacsat The Tacsat dataset
+#' @param startTacsat Index of Tacsat dataset of startpoint of interpolation
+#' @param interval Time in minutes between the succeeding datapoints
+#' @param margin Deviation from specified interval to find succeeding
+#' datapoints
+#' @note This function is called inside interpolateTacsat()
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{filterTacsat}}, \code{\link{interpolateTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' startTacsat <- 2
+#' #result: 3 0 Succeeding point = tacsat[3,]
+#' # and end dataset has not been reached yet.
+#' findEndTacsat(tacsat,startTacsat,interval=120,margin=10)
+#' 
+#' @export findEndTacsat
+`findEndTacsat` <-
+function(SI_DATIM
+                          ,startVMS #Starting point of VMS
+                          ,interval #Specify in minutes, NULL means use all points  
+                          ,margin   #Specify the margin in minutes it might deviate from the interval time, in minutes
+                       ){
+
+  #Calculate the difference in time between the starting VMS point and its succeeding points
+  diffTime          <- difftime(SI_DATIM[(startVMS+1):length(SI_DATIM)],SI_DATIM[startVMS],units=c("mins"))
+  if(length(which(diffTime >= (interval-margin) & diffTime <= (interval+margin)))==0){
+    warning("No succeeding point found, no interpolation possible")
+    endVMS          <- NA
+    endDataSet      <- 3
+      #Check if end of dataset has been reached
+    ifelse(all((diffTime < (interval-margin))==TRUE),endDataSet <- 1,endDataSet <- 0)
+  } else {
+      res           <- which(diffTime >= (interval-margin) & diffTime <= (interval+margin))
+      if(length(res)>1){
+        res2        <- which.min(abs(interval-an(diffTime[res])))
+        endVMS      <- startVMS + res[res2]
+        endDataSet  <- 0
+      } else {
+          endVMS    <- startVMS + res
+          endDataSet<- 0
+        }
+      }
+    #Build-in check
+  if(is.na(endVMS)==FALSE){
+    if(!an(difftime(SI_DATIM[endVMS],SI_DATIM[startVMS],units=c("mins"))) %in% seq((interval-margin),(interval+margin),1)) stop("found endVMS point not within interval range")
+    endVMS <- (endVMS - startVMS)
+  }
+return(c(endVMS,endDataSet))}
+
diff --git a/vmstools/R/formatEflalo.r b/vmstools/R/formatEflalo.r
index f9a3a74..1949039 100644
--- a/vmstools/R/formatEflalo.r
+++ b/vmstools/R/formatEflalo.r
@@ -1,37 +1,54 @@
-formatEflalo <- function(x){
-  x$VE_REF        <- ac(x$VE_REF)
-  x$VE_FLT        <- ac(x$VE_FLT)
-  x$VE_COU        <- ac(x$VE_COU)
-  x$VE_LEN        <- an(ac(x$VE_LEN))
-  x$VE_KW         <- an(ac(x$VE_KW))
-  if("VE_TON" %in% colnames(x))       x$VE_TON        <- an(ac(x$VE_TON))
-  x$FT_REF        <- ac(x$FT_REF)
-  x$FT_DCOU       <- ac(x$FT_DCOU)
-  x$FT_DHAR       <- ac(x$FT_DHAR)
-  x$FT_DDAT       <- ac(x$FT_DDAT)
-  x$FT_DTIME      <- ac(x$FT_DTIME)
-  x$FT_LCOU       <- ac(x$FT_LCOU)
-  x$FT_LHAR       <- ac(x$FT_LHAR)
-  x$FT_LDAT       <- ac(x$FT_LDAT)
-  x$FT_LTIME      <- ac(x$FT_LTIME)
-  x$LE_ID         <- ac(x$LE_ID)
-  x$LE_CDAT       <- ac(x$LE_CDAT)
-  if("LE_UNIT"  %in% colnames(x)) x$LE_UNIT       <- ac(x$LE_UNIT)
-  if("LE_STIME" %in% colnames(x)) x$LE_STIME      <- ac(x$LE_STIME)
-  if("LE_ETIME" %in% colnames(x)) x$LE_ETIME      <- ac(x$LE_ETIME)
-  if("LE_SLAT"  %in% colnames(x)) x$LE_SLAT       <- an(ac(x$LE_SLAT))
-  if("LE_SLON"  %in% colnames(x)) x$LE_SLON       <- an(ac(x$LE_SLON))
-  if("LE_ELAT"  %in% colnames(x)) x$LE_ELAT       <- an(ac(x$LE_ELAT))
-  if("LE_ELON"  %in% colnames(x)) x$LE_ELON       <- an(ac(x$LE_ELON))
-  x$LE_GEAR       <- ac(x$LE_GEAR)
-  x$LE_MSZ        <- an(ac(x$LE_MSZ))
-  x$LE_RECT       <- ac(x$LE_RECT)
-  x$LE_DIV        <- ac(x$LE_DIV)
-  if("LE_MET" %in% colnames(x)) x$LE_MET          <- ac(x$LE_MET)
-  for(i in c(grep("_KG_",colnames(x)),grep("_EURO_",colnames(x)))) x[,i] <- an(ac(x[,i]))
-  return(x)
-}
-
-
-
-
+#' Format Eflalo data to ensure that all columns are in right format
+#' 
+#' Reformat all the columns of the Eflalo data to ensure that all data is in
+#' the right format
+#' 
+#' 
+#' @param x eflalo file
+#' @return Returns original Eflalo file but now with reformatted data
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{formatTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(eflalo)
+#' eflalo <- formatEflalo(eflalo)
+#' 
+#' @export formatEflalo
+formatEflalo <- function(x){
+  x$VE_REF        <- ac(x$VE_REF)
+  x$VE_FLT        <- ac(x$VE_FLT)
+  x$VE_COU        <- ac(x$VE_COU)
+  x$VE_LEN        <- an(ac(x$VE_LEN))
+  x$VE_KW         <- an(ac(x$VE_KW))
+  if("VE_TON" %in% colnames(x))       x$VE_TON        <- an(ac(x$VE_TON))
+  x$FT_REF        <- ac(x$FT_REF)
+  x$FT_DCOU       <- ac(x$FT_DCOU)
+  x$FT_DHAR       <- ac(x$FT_DHAR)
+  x$FT_DDAT       <- ac(x$FT_DDAT)
+  x$FT_DTIME      <- ac(x$FT_DTIME)
+  x$FT_LCOU       <- ac(x$FT_LCOU)
+  x$FT_LHAR       <- ac(x$FT_LHAR)
+  x$FT_LDAT       <- ac(x$FT_LDAT)
+  x$FT_LTIME      <- ac(x$FT_LTIME)
+  x$LE_ID         <- ac(x$LE_ID)
+  x$LE_CDAT       <- ac(x$LE_CDAT)
+  if("LE_UNIT"  %in% colnames(x)) x$LE_UNIT       <- ac(x$LE_UNIT)
+  if("LE_STIME" %in% colnames(x)) x$LE_STIME      <- ac(x$LE_STIME)
+  if("LE_ETIME" %in% colnames(x)) x$LE_ETIME      <- ac(x$LE_ETIME)
+  if("LE_SLAT"  %in% colnames(x)) x$LE_SLAT       <- an(ac(x$LE_SLAT))
+  if("LE_SLON"  %in% colnames(x)) x$LE_SLON       <- an(ac(x$LE_SLON))
+  if("LE_ELAT"  %in% colnames(x)) x$LE_ELAT       <- an(ac(x$LE_ELAT))
+  if("LE_ELON"  %in% colnames(x)) x$LE_ELON       <- an(ac(x$LE_ELON))
+  x$LE_GEAR       <- ac(x$LE_GEAR)
+  x$LE_MSZ        <- an(ac(x$LE_MSZ))
+  x$LE_RECT       <- ac(x$LE_RECT)
+  x$LE_DIV        <- ac(x$LE_DIV)
+  if("LE_MET" %in% colnames(x)) x$LE_MET          <- ac(x$LE_MET)
+  for(i in c(grep("_KG_",colnames(x)),grep("_EURO_",colnames(x)))) x[,i] <- an(ac(x[,i]))
+  return(x)
+}
+
+
+
+
diff --git a/vmstools/R/formatTacsat.r b/vmstools/R/formatTacsat.r
index ea8d52b..d7b989a 100644
--- a/vmstools/R/formatTacsat.r
+++ b/vmstools/R/formatTacsat.r
@@ -1,18 +1,35 @@
-formatTacsat <- function(x){
-  if("VE_COU" %in% colnames(x)) x$VE_COU <- ac(x$VE_COU)
-  x$VE_REF    <- ac(x$VE_REF)
-  x$SI_LATI   <- anf(x$SI_LATI)
-  x$SI_LONG   <- anf(x$SI_LONG)
-  x$SI_DATE   <- ac(x$SI_DATE)
-  x$SI_TIME   <- ac(x$SI_TIME)
-  x$SI_SP     <- anf(x$SI_SP)
-  x$SI_HE     <- anf(x$SI_HE)
-  if("SI_HARB" %in% colnames(x))  x$SI_HARB   <- ac(x$SI_HARB)
-  if("SI_STATE" %in% colnames(x)) x$SI_STATE  <- ac(x$SI_STATE)
-  if("SI_FT" %in% colnames(x))    x$SI_FT     <- ac(x$SI_FT)
-  #Get rid of NAs in the long and lats
-  x <- x[!is.na(x$SI_LATI),]
-  x <- x[!is.na(x$SI_LONG),]
-  warnings("Those records where longitude and latitude were NA have been removed")
-  return(x)
-}
\ No newline at end of file
+#' Format tacsat data to ensure that all columns are in right format
+#' 
+#' Reformat all the columns of the tacsat data to ensure that all data is in
+#' the right format
+#' 
+#' 
+#' @param x tacsat file
+#' @return Returns original tacsat file but now with reformatted data
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{formatEflalo}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat <- formatTacsat(tacsat)
+#' 
+#' @export formatTacsat
+formatTacsat <- function(x){
+  if("VE_COU" %in% colnames(x)) x$VE_COU <- ac(x$VE_COU)
+  x$VE_REF    <- ac(x$VE_REF)
+  x$SI_LATI   <- anf(x$SI_LATI)
+  x$SI_LONG   <- anf(x$SI_LONG)
+  x$SI_DATE   <- ac(x$SI_DATE)
+  x$SI_TIME   <- ac(x$SI_TIME)
+  x$SI_SP     <- anf(x$SI_SP)
+  x$SI_HE     <- anf(x$SI_HE)
+  if("SI_HARB" %in% colnames(x))  x$SI_HARB   <- ac(x$SI_HARB)
+  if("SI_STATE" %in% colnames(x)) x$SI_STATE  <- ac(x$SI_STATE)
+  if("SI_FT" %in% colnames(x))    x$SI_FT     <- ac(x$SI_FT)
+  #Get rid of NAs in the long and lats
+  x <- x[!is.na(x$SI_LATI),]
+  x <- x[!is.na(x$SI_LONG),]
+  warnings("Those records where longitude and latitude were NA have been removed")
+  return(x)
+}
diff --git a/vmstools/R/getEflaloMetierLevel7.r b/vmstools/R/getEflaloMetierLevel7.r
index 68f14f8..0effdf6 100644
--- a/vmstools/R/getEflaloMetierLevel7.r
+++ b/vmstools/R/getEflaloMetierLevel7.r
@@ -1,140 +1,214 @@
-getEflaloMetierLevel7=function(dat, analysisName, path, critData="EURO", runHACinSpeciesSelection=TRUE, paramTotal=95, paramLogevent=100, critPca="PCA_70", algoClust="CLARA"){
-
-  #-------------------------------------------------------------------------
-  # I. GETTING THE DATA IN AND CLEANING FOR MISSING AND NEGATIVE VALUES ETC
-  #-------------------------------------------------------------------------
-
-  # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5).
-  data(correspLevel7to5)
-  
-  # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters).
-  data(correspMixedMetier)
-  
-  timeStart=Sys.time()
-  
-  print("--- CREATING DIRECTORIES AND REDUCING THE EFLALO DATASET TO THE ONLY DATA NECESSARY FOR THE ANALYSIS ---")
-  cat("\n") 
-    
-  # Creating the directory of the analysis
-  if (!file.exists(analysisName)) dir.create(analysisName)
-  setwd(file.path(path,analysisName))
-  # Delete old R cache
-  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)                                            
-
-  eflalo_ori = dat # Keeping this in cached memory for making the final merging at the end
-  Store(eflalo_ori)
-
-  # ! KEEPING ONLY LE_ID AND THE OUTPUT YOU WANT TO GET  (KG/EURO)
-  dat=dat[,c("LE_ID",grep(critData,names(dat),value=TRUE))]
-  dat[is.na(dat)]=0
-
-  # Removing negative and null values
-  null.value <- vector()
-  for (i in grep(critData,names(dat))) null.value <- c(null.value,which(dat[,i]<0))
-  null.value <- c(null.value,which(apply(dat[,2:ncol(dat)],1,sum,na.rm=TRUE)==0))
-
-  if(length(null.value)!=0) {LogEvent.removed <- dat[sort(unique(null.value)),] ; dat <- dat[-sort(unique(null.value)),]}
-
-  # Rename species names
-  names(dat)[-1]=unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]]))
-
-  # Removing miscellaneous species
-  dat <- dat[,!names(dat)=="MZZ"]
-  save(dat, file="dat_cleaned.Rdata")
-
-
-  #----------------------------------------------------------------------------------------------------------
-  # II. EXPLORING THE VARIOUS METHODS FOR IDENTIFYING MAIN SPECIES AND KEEPING THEM IN THE DATA SET (STEP 1)
-  #----------------------------------------------------------------------------------------------------------
-  
-  print("--- EXPLORING THE DATA FOR SELECTION OF MAIN SPECIES ---")
-  cat("\n") 
-
-  # Exploration of main species
-  explo=selectMainSpecies(dat,analysisName,RunHAC=runHACinSpeciesSelection,DiagFlag=FALSE)
-
-  # Step 1 : selection of main species
-  Step1=extractTableMainSpecies(dat,explo$namesMainSpeciesHAC,paramTotal=paramTotal,paramLogevent=paramLogevent)
-
-  save(explo,Step1,file="Explo_Step1.Rdata")
-  
-  rowNamesSave <- row.names(Step1)
-  row.names(Step1) <- 1:nrow(Step1)
-  
-  #-----------------------------
-  # III. STEP 2 - PCA - NO-PCA
-  #-----------------------------
-
-  if (!file.exists(critPca)) dir.create(critPca)
-  setwd(file.path(path,analysisName,critPca))
-  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)
-
-  if (critPca=="PCA_70") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="70percents") else    # criterion="70percents"
-  if (critPca=="PCA_SC") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="screetest") else    # criterion="screetest"
-  if (critPca=="NO_PCA") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="nopca",criterion=NULL)
-
-  row.names(Step1) <- rowNamesSave
-  row.names(Step2) <- rowNamesSave
-  
-  save(Step1,file="Step1.Rdata")
-  save(Step2,file="Step2.Rdata")
-  
-  #-------------------------------------------------------
-  # IV. STEP 3 - CLUSTERING METHOD : HAC, CLARA OR KMEANS
-  #-------------------------------------------------------
-
-  if (!file.exists(algoClust)) dir.create(algoClust)
-  setwd(file.path(path,analysisName,critPca,algoClust))
-  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)
-
-  if (algoClust=="HAC")    Step3=getMetierClusters(Step1,Step2,analysisName,methMetier="hac",param1="euclidean",param2="ward") else
-  if (algoClust=="CLARA")  Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="clara",param1="euclidean",param2=NULL) else
-  if (algoClust=="KMEANS") Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="kmeans",param1=NULL,param2=NULL)
-
-  save(Step3,file="Step3.Rdata")
-
-
-  #------------------------------------------------
-  # V. STEP 4 - COMPARISON WITH ORDINATION METHODS
-  #------------------------------------------------
-
-  compOrdin="CompOrdin"
-  if (!file.exists(compOrdin)) dir.create(compOrdin)
-  setwd(file.path(path,analysisName,critPca,algoClust,compOrdin))
-  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)
-  
-  if (algoClust=="HAC")     clusters=Step3$clusters
-  if (algoClust=="CLARA")   clusters=Step3$clusters$clustering
-  if (algoClust=="KMEANS")  clusters=Step3$clusters$cluster  
-  
-  compMetiers=compareToOrdination(dat=dat,Step2=Step2,clusters=clusters,targetSpecies=Step3$targetSpecies)
-  save(compMetiers,file="compMetiers.Rdata")
-  
-  
-  #-------------------------------------
-  # VI. STEP 5 - MERGING BACK TO EFLALO
-  #-------------------------------------
-
-  # Choosing the final option
-  setwd(file.path(path,analysisName))
-
-  #load(paste(path,analysisName,critPca,algoClust,"Step3.Rdata",sep="/"))
-
-  if(!nrow(dat)==nrow(Step3$LE_ID_clust)) print("--error : number of lines in step 3 not equal to input eflalo, please check!!--")
-
-  dat <- cbind(dat,CLUSTER=Step3$LE_ID_clust[,"clust"])
-
-  # Now reload the full data set
-  if(length(null.value)==0){
-    eflalo_ori[,"CLUSTER"]=Step3$LE_ID_clust[,"clust"]
-  }else{
-    eflalo_ori[-sort(unique(null.value)),"CLUSTER"]=Step3$LE_ID_clust[,"clust"]
-  }
-  
-  cat("\n")
-  print("Congratulation !! You have now a fully working eflalo dataset with a metier Level 7 !")
-  cat("\n")
-  print(Sys.time()-timeStart)
-  
-  return(eflalo_ori)
-}
\ No newline at end of file
+#' Finding Metiers from a full EFLALO dataset, general function calling all the
+#' various steps of the analyses at once
+#' 
+#' This function will run the various steps of the multivariate analyses in the
+#' right order, allowing the full anlysis to be run at once.
+#' 
+#' 
+#' @param dat a data.frame in eflalo format. It should be previously subsetted
+#' for the desired region and gear of interest.
+#' @param analysisName character, the name of the run. Used for the file name
+#' of the plots.
+#' @param path character, the path of the main directory where all data and
+#' plots will be stored. A number of sub-directories will be created by the
+#' function.
+#' @param critData character. Choice if the analysis must be performed on
+#' eflalo data in weight ("KG") or in value ("EURO")
+#' @param runHACinSpeciesSelection Boolean. the HAC can be time consuming for
+#' large datasets, and can therefore be switched off for quicker runs.
+#' @param paramTotal numeric. The percentage threshold for the Total method.
+#' All species ranked in terms of percentage in total catches and cumulating up
+#' to this percentage value will be retained. Default value is 95\%.
+#' @param paramLogevent numeric. The percentage threshold for the Logevent
+#' method. All species representing at least this threshold of at least one
+#' logevent (one line) will be retained. Default value is 100\% (logevents
+#' catching one and only one species).
+#' @param critPca character. Choice if a PCA must be run or not (="NO_PCA"),
+#' and if yes, which criterion is used for selecting the number of axes, to be
+#' chosen between scree test (="PCA_SC") or 70\% of cumulated inertia
+#' (="PCA_70")
+#' @param algoClust character. Choice of the clustering algorithm, either
+#' "CLARA", "HAC" or "KMEANS".
+#' @return
+#' 
+#' The function returns the same eflalo data set as was input, with an
+#' additional column giving the cluster name where each logevent has been
+#' allocated.
+#' 
+#' Beside, a number of sub-directories are created, with intermediate results
+#' and plots.
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @seealso \code{\link{selectMainSpecies}},
+#' \code{\link{extractTableMainSpecies}}, \code{\link{getTableAfterPCA}},
+#' \code{\link{getMetierClusters}},
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @examples
+#' 
+#' 
+#' \dontrun{
+#' 
+#' data(eflalo)
+#' 
+#' eflalo <- formatEflalo(eflalo)
+#' 
+#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",]
+#' 
+#' # Note that output plots will be sent to getwd()
+#' path <- getwd()
+#' analysisName <- "metier_analysis_OTB"
+#' 
+#' # Return a fully working eflalo dataset with a metier at DCF Level7 for each
+#' # logbook event
+#' eflalo_metiers_level7 <- getEflaloMetierLevel7(eflalo, analysisName, path,
+#'     critData="EURO",runHACinSpeciesSelection=TRUE, paramTotal=95,
+#'     paramLogevent=100,critPca="PCA_70", algoClust="CLARA")
+#'         
+#' }
+#' 
+#' 
+#' @export getEflaloMetierLevel7
+getEflaloMetierLevel7=function(dat, analysisName, path, critData="EURO", runHACinSpeciesSelection=TRUE, paramTotal=95, paramLogevent=100, critPca="PCA_70", algoClust="CLARA"){
+
+  #-------------------------------------------------------------------------
+  # I. GETTING THE DATA IN AND CLEANING FOR MISSING AND NEGATIVE VALUES ETC
+  #-------------------------------------------------------------------------
+
+  # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5).
+  data(correspLevel7to5)
+  
+  # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters).
+  data(correspMixedMetier)
+  
+  timeStart=Sys.time()
+  
+  print("--- CREATING DIRECTORIES AND REDUCING THE EFLALO DATASET TO THE ONLY DATA NECESSARY FOR THE ANALYSIS ---")
+  cat("\n") 
+    
+  # Creating the directory of the analysis
+  if (!file.exists(analysisName)) dir.create(analysisName)
+  setwd(file.path(path,analysisName))
+  # Delete old R cache
+  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)                                            
+
+  eflalo_ori = dat # Keeping this in cached memory for making the final merging at the end
+  Store(eflalo_ori)
+
+  # ! KEEPING ONLY LE_ID AND THE OUTPUT YOU WANT TO GET  (KG/EURO)
+  dat=dat[,c("LE_ID",grep(critData,names(dat),value=TRUE))]
+  dat[is.na(dat)]=0
+
+  # Removing negative and null values
+  null.value <- vector()
+  for (i in grep(critData,names(dat))) null.value <- c(null.value,which(dat[,i]<0))
+  null.value <- c(null.value,which(apply(dat[,2:ncol(dat)],1,sum,na.rm=TRUE)==0))
+
+  if(length(null.value)!=0) {LogEvent.removed <- dat[sort(unique(null.value)),] ; dat <- dat[-sort(unique(null.value)),]}
+
+  # Rename species names
+  names(dat)[-1]=unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]]))
+
+  # Removing miscellaneous species
+  dat <- dat[,!names(dat)=="MZZ"]
+  save(dat, file="dat_cleaned.Rdata")
+
+
+  #----------------------------------------------------------------------------------------------------------
+  # II. EXPLORING THE VARIOUS METHODS FOR IDENTIFYING MAIN SPECIES AND KEEPING THEM IN THE DATA SET (STEP 1)
+  #----------------------------------------------------------------------------------------------------------
+  
+  print("--- EXPLORING THE DATA FOR SELECTION OF MAIN SPECIES ---")
+  cat("\n") 
+
+  # Exploration of main species
+  explo=selectMainSpecies(dat,analysisName,RunHAC=runHACinSpeciesSelection,DiagFlag=FALSE)
+
+  # Step 1 : selection of main species
+  Step1=extractTableMainSpecies(dat,explo$namesMainSpeciesHAC,paramTotal=paramTotal,paramLogevent=paramLogevent)
+
+  save(explo,Step1,file="Explo_Step1.Rdata")
+  
+  rowNamesSave <- row.names(Step1)
+  row.names(Step1) <- 1:nrow(Step1)
+  
+  #-----------------------------
+  # III. STEP 2 - PCA - NO-PCA
+  #-----------------------------
+
+  if (!file.exists(critPca)) dir.create(critPca)
+  setwd(file.path(path,analysisName,critPca))
+  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)
+
+  if (critPca=="PCA_70") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="70percents") else    # criterion="70percents"
+  if (critPca=="PCA_SC") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="screetest") else    # criterion="screetest"
+  if (critPca=="NO_PCA") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="nopca",criterion=NULL)
+
+  row.names(Step1) <- rowNamesSave
+  row.names(Step2) <- rowNamesSave
+  
+  save(Step1,file="Step1.Rdata")
+  save(Step2,file="Step2.Rdata")
+  
+  #-------------------------------------------------------
+  # IV. STEP 3 - CLUSTERING METHOD : HAC, CLARA OR KMEANS
+  #-------------------------------------------------------
+
+  if (!file.exists(algoClust)) dir.create(algoClust)
+  setwd(file.path(path,analysisName,critPca,algoClust))
+  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)
+
+  if (algoClust=="HAC")    Step3=getMetierClusters(Step1,Step2,analysisName,methMetier="hac",param1="euclidean",param2="ward") else
+  if (algoClust=="CLARA")  Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="clara",param1="euclidean",param2=NULL) else
+  if (algoClust=="KMEANS") Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="kmeans",param1=NULL,param2=NULL)
+
+  save(Step3,file="Step3.Rdata")
+
+
+  #------------------------------------------------
+  # V. STEP 4 - COMPARISON WITH ORDINATION METHODS
+  #------------------------------------------------
+
+  compOrdin="CompOrdin"
+  if (!file.exists(compOrdin)) dir.create(compOrdin)
+  setwd(file.path(path,analysisName,critPca,algoClust,compOrdin))
+  if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE)
+  
+  if (algoClust=="HAC")     clusters=Step3$clusters
+  if (algoClust=="CLARA")   clusters=Step3$clusters$clustering
+  if (algoClust=="KMEANS")  clusters=Step3$clusters$cluster  
+  
+  compMetiers=compareToOrdination(dat=dat,Step2=Step2,clusters=clusters,targetSpecies=Step3$targetSpecies)
+  save(compMetiers,file="compMetiers.Rdata")
+  
+  
+  #-------------------------------------
+  # VI. STEP 5 - MERGING BACK TO EFLALO
+  #-------------------------------------
+
+  # Choosing the final option
+  setwd(file.path(path,analysisName))
+
+  #load(paste(path,analysisName,critPca,algoClust,"Step3.Rdata",sep="/"))
+
+  if(!nrow(dat)==nrow(Step3$LE_ID_clust)) print("--error : number of lines in step 3 not equal to input eflalo, please check!!--")
+
+  dat <- cbind(dat,CLUSTER=Step3$LE_ID_clust[,"clust"])
+
+  # Now reload the full data set
+  if(length(null.value)==0){
+    eflalo_ori[,"CLUSTER"]=Step3$LE_ID_clust[,"clust"]
+  }else{
+    eflalo_ori[-sort(unique(null.value)),"CLUSTER"]=Step3$LE_ID_clust[,"clust"]
+  }
+  
+  cat("\n")
+  print("Congratulation !! You have now a fully working eflalo dataset with a metier Level 7 !")
+  cat("\n")
+  print(Sys.time()-timeStart)
+  
+  return(eflalo_ori)
+}
diff --git a/vmstools/R/getMetierClusters.r b/vmstools/R/getMetierClusters.r
index 423487e..ab7f2d3 100644
--- a/vmstools/R/getMetierClusters.r
+++ b/vmstools/R/getMetierClusters.r
@@ -1,1894 +1,2075 @@
-################################################################################
-#  STEP 3 OF THE MULTIVARIATE CLASSIFICATION :                                 #
-#         RUN THE CLUSTERING OF THE LOGEVENTS                                  #
-#         4 METHODS ARE AVALAIBLE : HAC / KMEANS / PAM / CLARA                 #
-################################################################################
-
-
-getMetierClusters = function(datSpecies,datLog,analysisName="",methMetier="clara",param1="euclidean",param2=NULL){
-
-  # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5).
-  data(correspLevel7to5)
-  require(lattice)
-
-  LE_ID=rownames(datSpecies)
-  nbSpec=ncol(datSpecies)
-  datSpecies=as.matrix(datSpecies,ncol=nbSpec,nrow=length(LE_ID))
-
-  print("######## STEP 3 CLUSTERING ########")
-
-  t1=Sys.time()
-  print(paste(" --- selected method :",methMetier, " ---"))
-
-
-########################################################################################################################################   HAC
-
-  if(methMetier=="hac"){
-
-    classifWithinVar=numeric()
-    classifBetweenVar=numeric()
-    classifQuality=numeric()
-    sampleList=numeric()
-    mProfilSample=numeric()
-    classifVarExplain=numeric()
-
-    nbLog=nrow(datLog)
-    nbDim=ncol(datLog)
-
-    # Center of gravity of datLog
-    centerOfGravityDatLog=colMeans(datLog)
-
-    # HAC like CLARA (HAC on sample, affectation of each logevent to a cluster, quality of classification, do it 5 times, choose the sample which gives the best quality of classification)
-    print("hac on subsets...")
-
-    for(i in 1:5){
-
-      numSample=i
-      print(paste("sample",i))
-      # Sample of size 10000 logevents or 30% of all logevents
-      minsam=min(nbLog,max(10000,round(nbLog*30/100)))
-      sam=sample(1:nbLog,size=minsam,replace=FALSE)
-      # Record the 5 samples
-      sampleList=rbind(sampleList,sam)
-      outofsam=setdiff(1:nbLog,sam)
-      sampleDatLog=datLog[sam,]
-      sampleDatSpecies=datSpecies[sam,]
-
-      # HAC on the sample
-      log.hac=hcluster(sampleDatLog, method=param1, link=param2)
-      inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)]
-      nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3]
-
-      # Cut the dendogram at the selected level
-      sampleClusters=cutree(log.hac,k=nbClust)
-
-      Store(objects())
-      gc(reset=TRUE)
-
-      # Add the cluster to each logevent of the sample
-      sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters)
-
-      sampleClusters=sampleDatLogWithClusters[,ncol(sampleDatLogWithClusters)]
-
-      # Within and between variance of clusters and classification
-      centerOfGravityClassif=numeric()
-      withinVarClusters=numeric()
-      sizeClusti=numeric()
-      centerOfGravitySampleDatLog=colMeans(sampleDatLog)
-      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravitySampleDatLog)
-      for(k in 1:nbClust){  # Within variance by cluster
-
-        clusti=sampleDatLogWithClusters[which(sampleClusters==k),1:nbDim]
-        if(length(which(sampleClusters==k))==1)  centerOfGravityClusti=clusti
-        else centerOfGravityClusti=colMeans(clusti)
-        centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
-        sizeClusti[k]=length(which(sampleClusters==k))
-        if(length(which(sampleClusters==k))==1)  withinVarClusters[k]=0
-        else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
-
-      }
-      # Between variance
-      classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
-      # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
-      withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-      betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-      classifVarExplain=c(classifVarExplain,betweenVarClassifOnTot)
-
-
-      # Catch profile by cluster for each sample
-      nbSpec=ncol(datSpecies)
-      mprofil=numeric()
-      blank=rep(00,nbSpec)
-      for(k in 1:nbClust){
-        mprofilclusti=colMeans(sampleDatSpecies[which(sampleClusters==k),])
-        mprofil=rbind(mprofil,mprofilclusti)
-      }
-      mprofil=rbind(mprofil,blank)
-
-      mProfilSample=rbind(mProfilSample,mprofil)
-
-
-      # Graphics
-
-      # Calculation of each cluster size
-      sizeClusters=numeric()
-      for(k in 1:nbClust){
-        sizeClusters[k]=length(which(sampleClusters==k))
-      }
-
-      # Compute the test-values for species
-      resval=test.values(sampleClusters,sampleDatSpecies)
-      # Determine the target species
-      target=targetspecies(resval)
-
-
-      # Projections on the first factorial plans
-      png(paste(analysisName,numSample,"Sample_Projections.png",sep="_"), width = 1200, height = 800)
-      op <- par(mfrow=c(2,3))
-      plot(sampleDatLog[,1], sampleDatLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
-      if(dim(datLog)[2]>2) {
-      plot(sampleDatLog[,2], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
-      plot(sampleDatLog[,1], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
-      if(dim(datLog)[2]>3) {
-      plot(sampleDatLog[,1], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
-      plot(sampleDatLog[,2], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
-      plot(sampleDatLog[,3], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
-      }}
-      par(op)
-      dev.off()
-
-
-      # Rectangles plotting
-      png(paste(analysisName,numSample,"Sample_Dendogram.png",sep="_"), width = 1200, height = 800)
-      plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE)
-      title(main="HAC dendogram",xlab="Logevents",ylab="Height")
-      rect.hclust(log.hac, k=nbClust)
-      dev.off()
-
-
-      # Catch profile of the dataset
-      meanprofile=colMeans(sampleDatSpecies)
-      png(paste(analysisName,numSample,"Sample_Catch profile of the sample.png",sep="_"), width = 1200, height = 800)
-      op <- par(las=2)
-      barplot(meanprofile, main="Catch profile of the sample", xlab="Species", ylab="Percentage of catch")
-      par(op)
-      mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-      dev.off()
-
-
-      # Catch profile by cluster
-      nbSpec=ncol(sampleDatSpecies)
-      summarySampleClusters=array(0,dim=c(6,nbSpec,nbClust))
-      dimnames(summarySampleClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
-      dimnames(summarySampleClusters)[[2]]=names(meanprofile)
-      dimnames(summarySampleClusters)[[3]]=paste("Cluster",1:nbClust)
-      for(k in 1:nbClust){
-        if(sizeClusters[k]==1){
-          summarySampleClusters[,,k]=apply(t(as.matrix(sampleDatSpecies[which(sampleClusters==k),])),2,
-            function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-        }else{
-          summarySampleClusters[,,k]=apply(sampleDatSpecies[which(sampleClusters==k),],2,
-            function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-        }
-      }
-      # Species names for catch profile plots
-      nameSpPlot=character()
-      catchMeanThreshold=2
-      for(k in 1:nbClust){
-        namSpi=names(meanprofile[which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold)])
-        numSpi=which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold)
-        nameSpPloti=rep("",nbSpec)
-        nameSpPloti[numSpi]=namSpi
-        nameSpPlot=rbind(nameSpPlot,nameSpPloti)
-      }
-      png(paste(analysisName,numSample,"Sample_Mean profile by cluster of the sample.png",sep="_"), width = 1200, height = 800)
-      op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-      for(k in 1:nbClust){
-        op2 <- par(las=2)
-        barplot(t(summarySampleClusters["Mean",,k]), names.arg=nameSpPlot[k,], xlab="Species", ylab="Percentage of catch", col="gray")
-        par(op2)
-        mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-      }
-      par(op)
-      title(main=paste("Catch profile by cluster of the sample","\n","\n",sep=""))
-      dev.off()
-
-
-      # Standard deviation profile by cluster
-      sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
-      namSdPlot=character()
-      SdThreshold=2
-      for(k in 1:nbClust){
-        if(length(which(sampleClusters==k))==1){ sdprofilclusti=rep(0,nbSpec)
-        }else{sdprofilclusti=apply(sampleDatSpecies[which(sampleClusters==k),],2,sd)}
-        namSDi=names(which(sdprofilclusti>SdThreshold))
-        numSDi=which(sdprofilclusti>SdThreshold)
-        namSdPloti=rep("",nbSpec)
-        namSdPloti[numSDi]=namSDi
-        sdprofil[k,]=sdprofilclusti
-        namSdPlot=rbind(namSdPlot,namSdPloti)
-      }
-      rownames(sdprofil) <- 1:nrow(sdprofil)
-      png(paste(analysisName,numSample,"Sample_Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
-      op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-      for(k in 1:nbClust){
-        op2 <- par(las=2)
-        barplot(sdprofil[k,], names.arg=namSdPlot[k,], xlab="Species", ylab="Percentage of catch")
-        par(op2)
-        mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-      }
-      par(op)
-      title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
-      dev.off()
-
-
-      # Number of Logevents by cluster
-      x=c(1:nbClust)
-      png(paste(analysisName,numSample,"Sample_Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
-      coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
-      barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
-      text(coord,sizeClusters+100,sizeClusters,font=2,xpd=NA)
-      dev.off()
-
-
-      # Target Species profiles (test-value)
-      targetresval=numeric()
-      nameTargetPlot=character()
-      for(k in 1:nbClust){
-        nomtargeti=as.character(target$tabnomespcib[k,which(!is.na(target$tabnumespcib[k,]))])
-        numtargeti=as.numeric(target$tabnumespcib[k,which(!is.na(target$tabnumespcib[k,]))])
-        nameTargetPloti=rep("",nbSpec)
-        nameTargetPloti[numtargeti]=nomtargeti
-        nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
-        targetresvalclusti=rep(0,nbSpec)
-        targetresvalclusti[numtargeti]=resval[nomtargeti,k]
-        targetresval=rbind(targetresval,targetresvalclusti)
-      }
-
-      png(paste(analysisName,numSample,"Sample_Profile of target species by cluster.png",sep="_"), width = 1200, height = 800)
-      op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-      for(k in 1:nbClust){
-        op2 <- par(las=2)
-        barplot(targetresval[k,],names.arg=nameTargetPlot[k,], cex.names=1, xlab="Species", ylab="Test-value")
-        par(op2)
-        mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-      }
-      par(op)
-      title(main=paste("Profile of target species by cluster","\n","\n",sep=""))
-      dev.off()
-
-      Store(objects())
-      gc(reset=TRUE)
-
-    } # end of for(i in 1:5)
-
-
-
-
-    # Select the sample which gives the smaller classification's quality (the best sample)
-    sam=sampleList[which.max(classifVarExplain),]
-    outofsam=setdiff(1:nbLog,sam)
-    sampleDatLog=datLog[sam,]
-
-    nbLogSample=nrow(sampleDatLog)
-    nbDim=ncol(sampleDatLog)
-
-
-    # HAC with the best sample
-    print("Final HAC")
-    log.hac=hcluster(sampleDatLog, method=param1, link=param2)
-
-
-    # Determine the number of cluster thanks to the scree-test
-    inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)]
-    nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3]
-
-    # Cut the dendogram at the selected level
-    sampleClusters=cutree(log.hac,k=nbClust)
-    sampleClusters=as.factor(sampleClusters)
-
-    sampleDatLogWithClusters=data.frame()
-    sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters)
-    sampleDatLogWithClusters=as.data.frame(sampleDatLogWithClusters)
-
-    # Discriminante analysis on the learning dataset
-    learning=lda(sampleClusters~.,data=sampleDatLogWithClusters)
-
-    otherLog=datLog[outofsam,]
-    otherLog=as.data.frame(otherLog)
-
-    # Predict the cluster for the other logevent
-    pred=predict(learning,otherLog)
-    otherDatLogWithClusters=cbind(otherLog,pred$class)
-    colnames(otherDatLogWithClusters)=colnames(sampleDatLogWithClusters)
-
-    # Rebuilt complete datLog with clusters
-    clusters=numeric(length=nbLog)
-    clusters[sam]=sampleClusters
-    clusters[outofsam]=pred$class
-
-
-    # Within and between variance of clusters and classification
-    centerOfGravityClassif=numeric()
-    withinVarClusters=numeric()
-    sizeClusti=numeric()
-    centerOfGravityDatLog=colMeans(datLog)
-    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
-    for(k in 1:nbClust){  # Within variance by cluster
-
-      clusti=datLog[which(clusters==k),1:nbDim]
-      if(length(which(clusters==k))==1)  centerOfGravityClusti=clusti
-      else centerOfGravityClusti=colMeans(clusti)
-      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
-      sizeClusti[k]=length(which(clusters==k))
-      if(length(which(clusters==k))==1)  withinVarClusters[k]=0
-      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
-
-    }
-    # Between variance
-    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
-    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
-    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-
-
-    # Calculation of each cluster size
-    n=nrow(datLog)
-    sizeClusters=numeric()
-    for(k in 1:nbClust){
-      sizeClusters[k]=length(which(clusters==k))
-    }
-
-    # Compute the test-values for species
-    resval=test.values(clusters,datSpecies)
-    # Determine the target species
-    target=targetspecies(resval)
-    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
-
-
-    # Compute the percentage of logevents catching each species by cluster
-    mainSpecies=colnames(datSpecies)
-    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
-    for(i in 1:nbClust){
-      percLogevents[i,]=round(sapply(mainSpecies,function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)
-    }
-    
-
-    # Projections on the first factorial plans
-    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(2,3))
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
-    if(dim(datLog)[2]>2) {
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
-    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
-    if(dim(datLog)[2]>3) {
-    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
-    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
-    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
-    }}
-    par(op)
-    dev.off()
-
-    # For a paper
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 2", ylab="Axis 3")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_2_3_HAC',sep="_"), type='png')
-    dev.off()
-
-
-    # Rectangles plotting
-    png(paste(analysisName,"Dendogram.png",sep="_"), width = 1200, height = 800)
-    plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE)
-    title(main="HAC dendogram",xlab="Logevents",ylab="Height")
-    rect.hclust(log.hac, k=nbClust)
-    dev.off()
-
-
-    # Catch profile of the dataset
-    meanprofile=colMeans(datSpecies)
-    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
-    op <- par(las=2)
-    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
-    par(op)
-    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    dev.off()
-
-
-    # Catch profile by cluster
-    nbSpec=ncol(datSpecies)
-    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
-    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
-    dimnames(summaryClusters)[[2]]=names(meanprofile)
-    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      if(sizeClusters[i]==1){
-        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters==i),])),2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }else{
-        summaryClusters[,,i]=apply(datSpecies[which(clusters==i),],2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }
-    }
-    # Species names for catch profile plots
-    nameSpPlot=character()
-    catchMeanThreshold=2
-    for(i in 1:nbClust){
-      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
-      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
-      nameSpPloti=rep("",nbSpec)
-      nameSpPloti[numSpi]=namSpi
-      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
-    }
-    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # For a paper : levelplot
-    X11(4,4)
-    mat <- t(summaryClusters["Mean",,])
-    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
-    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
-    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
-    sp <- apply(mat,2, function(x) length(which(x>20)))
-    sp2=sp[which(sp>=1)]
-    #colnames(mat)[sp==0] <- ""
-    mat2=mat[,which(colnames(mat)%in%names(sp2))]
-    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
-    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
-    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
-    dev.off()
-
-
-    # Standard deviation profile by cluster
-    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
-    namSdPlot=character()
-    SdThreshold=2
-    for(i in 1:nbClust){
-      if(length(which(clusters==i))==1){ sdprofilclusti=rep(0,nbSpec)
-      }else{sdprofilclusti=apply(datSpecies[which(clusters==i),],2,sd)}
-      namSDi=names(which(sdprofilclusti>SdThreshold))
-      numSDi=which(sdprofilclusti>SdThreshold)
-      namSdPloti=rep("",nbSpec)
-      namSdPloti[numSDi]=namSDi
-      sdprofil[i,]=sdprofilclusti
-      namSdPlot=rbind(namSdPlot,namSdPloti)
-    }
-    rownames(sdprofil) <- 1:nrow(sdprofil)
-    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # Number of Logevents by cluster
-    x=c(1:nbClust)
-    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
-    coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
-    barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
-    text(coord,sizeClusters+400,sizeClusters,font=2,xpd=NA)
-    dev.off()
-
-
-    # Profile of test-values by cluster
-    targetresval=numeric()
-    nameTargetPlot=character()
-    for(i in 1:nbClust){
-      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      nameTargetPloti=rep("",nbSpec)
-      nameTargetPloti[numtargeti]=nomtargeti
-      nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
-      targetresvalclusti=rep(0,nbSpec)
-      targetresvalclusti[numtargeti]=resval[nomtargeti,i]
-      targetresval=rbind(targetresval,targetresvalclusti)
-    }
-
-    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(targetresval[i,],names.arg=nameTargetPlot[i,], cex.names=1, xlab="Species", ylab="Test-value")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-
-
-    # Descriptive tables of the clusters
-    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
-    for(i in 1:nbClust){
-      clusterDesc[,i]=c(i,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
-                        length(which(t(summaryClusters["Median",,i])>50)),
-                        length(which(resval[,i]>1.96)),
-                        length(which(resval[,i]>3.29)),
-                        length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>50)),
-                        length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>90)),
-                        sizeClusters[i])
-    }
-    rownames(clusterDesc)=c("Number of species",
-                            "to have 50% of catch", "to have 90% of catch",
-                            "with a median higher than 50",
-                            "with a test-value > 1.96", "with a test-value > 3.29",
-                            "catch in 50% of the logevents", "catch in 90% of the logevents",
-                            "Clusters size")
-    colnames(clusterDesc)=1:nbClust
-    clusterDesc2=as.data.frame(clusterDesc)
-
-
-    # Summary tables of the clusters
-    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
-    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
-    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
-    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
-    listLibname=list()
-    
-    for(i in 1:nbClust){
-      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
-      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
-      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
-      if(length(a)!=0){
-        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
-      }
-      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
-    }
-    
-    for(i in 1:nbClust){
-        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
-                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
-        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
-    }
-
-    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
-    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
-    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
-
-    for(i in 1:nbClust){
-      print("-----------------------------------------------------------------")
-      print(paste("Cluster",i))
-      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabPropCatch[i,1:length(propCatch)]=propCatch
-      print(propCatch)
-      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabTestVal[i,1:length(testVal)]=testVal
-      print(testVal)
-      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)>=0.1)]
-      tabPropLog[i,1:length(propLog)]=propLog
-      print(propLog)
-    }
-
-    tabClusters=array(0,dim=c(10,5,nbClust))
-    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
-    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
-    }
-
-    sizeTabClusters=numeric()
-    for(i in 1:nbClust){
-      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
-    }
-
-
-    # Target Species
-    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
-    #                                                   - Test-value > thresholdTestValue
-    #                                                   - % Logevents > thresholdLogevents 
-    thresholdCatch=75
-    thresholdTestValue=3
-    thresholdLogevents=30
-    
-    sppCumCatch=list()
-    sppTestValue=list()
-    sppLogevents=list()
-    targetSpeciesByCluster=list()
-    
-    for (i in 1:nbClust){
-      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
-      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
-      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
-      
-      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
-      
-      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
-    
-      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
-      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
-    }
-
-    # List of metiers (level 7)
-    listMetiersL7=list()
-    for (i in 1:nbClust){
-      metiersClusteri=targetSpeciesByCluster[[i]]
-      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
-      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
-      listMetiersL7[[i]]=metiersClusteri
-    }
-  
-    # Metier (level 7) of each logevent
-    metierByLogeventL7=unlist(sapply(clusters,function(x) listMetiersL7[[x]]))
-
-
-
-    # Create csv tables
-    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
-    
-    dfClust=data.frame()
-    dfClust=paste("Clust ",1:nbClust,sep="")
-    for(i in 1:nbClust){
-      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
-      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
-      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
-    }
-    
-    
-    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
-    print(" --- end of step 3 ---")
-    print(Sys.time()-t1)
-
-    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, sizeClusters=sizeClusters,
-     betweenVarClassifOnTot=betweenVarClassifOnTot, mProfilSample=mProfilSample, 
-     nbClust=nbClust, summaryClusters=summaryClusters, testValues=resval, 
-     testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents,
-     descClusters=clusterDesc2, tabClusters=tabClusters,
-     targetSpecies=targetSpeciesByCluster))
-
-  }   else
-
-
-
-                          
-
-########################################################################################################################################   KMEANS
-
-  if(methMetier=="kmeans"){
-    # Calculation of optimal k thanks to within variance
-    nbLog=nrow(datLog)
-
-    varintra=numeric()
-    for (k in 2:15){
-      clustersKmeans=kmeans(datLog, k, iter.max=60, nstart=10)
-      varintra[k]=1/nbLog*sum(clustersKmeans$withinss)
-    }
-    nbClust=which(scree(varintra)[,"epsilon"]<0)[3]
-    
-
-    png(paste(analysisName,"Within variance of the classification.png",sep="_"), width = 1200, height = 800)
-    plot(varintra,main="Within clusters variance",xlab="Number of clusters",ylab="Within Variance")
-    dev.off()
-
-    Store(objects())
-    gc(reset=TRUE)
-
-    # KMEANS with k optimal
-    clusters=kmeans(datLog, nbClust, iter.max=60, nstart=10, algorithm="Hartigan-Wong")
-
-
-    # Within and between variance of clusters and classification
-    centerOfGravityClassif=numeric()
-    withinVarClusters=numeric()
-    sizeClusti=numeric()
-    centerOfGravityDatLog=colMeans(datLog)
-    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
-    for(k in 1:nbClust){  # Within variance by cluster
-
-      clusti=datLog[which(clusters$cluster==k),]
-      if(length(which(clusters$cluster==k))==1)  centerOfGravityClusti=clusti
-      else centerOfGravityClusti=colMeans(clusti)
-      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
-      sizeClusti[k]=length(which(clusters$cluster==k))
-      if(length(which(clusters$cluster==k))==1)  withinVarClusters[k]=0
-      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
-
-    }
-    # Between variance
-    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
-    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
-    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-
-
-    # Compute the test-values for species
-    resval=test.values(clusters$cluster,datSpecies)
-    # Determine the target species
-    target=targetspecies(resval)
-    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
-    
-    
-    # Compute the percentage of logevents catching each species by cluster
-    mainSpecies=colnames(datSpecies)
-    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
-    for(i in 1:nbClust){
-      percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)
-    }
-    
-
-    # Projections on the first factorial plans
-    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(2,3))
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
-    if(dim(datLog)[2]>2) {
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
-    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
-    if(dim(datLog)[2]>3) {
-    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
-    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
-    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
-    }}
-    par(op)
-    dev.off()
-
-    # For a paper
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 2", ylab="Axis 3")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_2_3_Kmeans',sep="_"), type='png')
-    dev.off()
-
-
-    # Catch profile of the dataset
-    meanprofile=colMeans(datSpecies)
-    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
-    op <- par(las=2)
-    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
-    par(op)
-    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    dev.off()
-
-
-    # Catch profile by cluster
-    nbSpec=ncol(datSpecies)
-    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
-    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
-    dimnames(summaryClusters)[[2]]=names(meanprofile)
-    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      if(clusters$size[i]==1){
-        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$cluster==i),])),2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }else{
-        summaryClusters[,,i]=apply(datSpecies[which(clusters$cluster==i),],2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }
-    }
-    # Species names for catch profile plots
-    nameSpPlot=character()
-    catchMeanThreshold=2
-    for(i in 1:nbClust){
-      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
-      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
-      nameSpPloti=rep("",nbSpec)
-      nameSpPloti[numSpi]=namSpi
-      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
-    }
-    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # For a paper : levelplot
-    X11(4,4)
-    mat <- t(summaryClusters["Mean",,])
-    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
-    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
-    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
-    sp <- apply(mat,2, function(x) length(which(x>20)))
-    sp2=sp[which(sp>=1)]
-    #colnames(mat)[sp==0] <- ""
-    mat2=mat[,which(colnames(mat)%in%names(sp2))]
-    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
-    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
-    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
-    dev.off()
-
-
-    # Standard deviation profile by cluster
-    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
-    namSdPlot=character()
-    SdThreshold=2
-    for(i in 1:nbClust){
-      if(length(which(clusters$cluster==i))==1){ sdprofilclusti=rep(0,nbSpec)
-      }else{sdprofilclusti=apply(datSpecies[which(clusters$cluster==i),],2,sd)}
-      namSDi=names(which(sdprofilclusti>SdThreshold))
-      numSDi=which(sdprofilclusti>SdThreshold)
-      namSdPloti=rep("",nbSpec)
-      namSdPloti[numSDi]=namSDi
-      sdprofil[i,]=sdprofilclusti
-      namSdPlot=rbind(namSdPlot,namSdPloti)
-    }
-    rownames(sdprofil) <- 1:nrow(sdprofil)
-    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # Number of Logevents by cluster
-    x=c(1:nbClust)
-    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
-    coord=barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
-    barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
-    text(coord,clusters$size+400,clusters$size,font=2,xpd=NA)
-    dev.off()
-
-
-    # Profile of test-values by cluster
-    targetresval=numeric()
-    nameTargetPlot=character()
-    for(i in 1:nbClust){
-      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      nameTargetPloti=rep("",nbSpec)
-      nameTargetPloti[numtargeti]=nomtargeti
-      nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
-      targetresvalclusti=rep(0,nbSpec)
-      targetresvalclusti[numtargeti]=resval[nomtargeti,i]
-      targetresval=rbind(targetresval,targetresvalclusti)
-    }
-
-    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(targetresval[i,], cex.names=1, names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-
-
-    # Descriptive tables of the clusters
-    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
-    for(i in 1:nbClust){
-      clusterDesc[,i]=c(i,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
-                        length(which(t(summaryClusters["Median",,i])>50)),
-                        length(which(resval[,i]>1.96)),
-                        length(which(resval[,i]>3.29)),
-                        length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>50)),
-                        length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>90)),
-                        clusters$size[i])
-    }
-    rownames(clusterDesc)=c("Number of species",
-                            "to have 50% of catch", "to have 90% of catch",
-                            "with a median higher than 50",
-                            "with a test-value > 1.96", "with a test-value > 3.29",
-                            "catch in 50% of the logevents", "catch in 90% of the logevents",
-                            "Clusters size")
-    colnames(clusterDesc)=1:nbClust
-    clusterDesc2=as.data.frame(clusterDesc)
-
-
-    # Summary tables of the clusters
-    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
-    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
-    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
-    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
-    listLibname=list()
-
-    for(i in 1:nbClust){
-      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
-      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
-      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
-      if(length(a)!=0){
-        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
-      }
-      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
-    }
-    
-    for(i in 1:nbClust){
-        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
-                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
-        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
-    }
-        
-    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
-    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
-    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
-
-    for(i in 1:nbClust){
-      print("-----------------------------------------------------------------")
-      print(paste("Cluster",i))
-      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabPropCatch[i,1:length(propCatch)]=propCatch
-      print(propCatch)
-      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabTestVal[i,1:length(testVal)]=testVal
-      print(testVal)
-      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)>=0.1)]
-      tabPropLog[i,1:length(propLog)]=propLog
-      print(propLog)
-    }
-
-    tabClusters=array(0,dim=c(10,5,nbClust))
-    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
-    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
-    }
-
-    sizeTabClusters=numeric()
-    for(i in 1:nbClust){
-      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
-    }
-
-
-    # Target Species
-    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
-    #                                                   - Test-value > thresholdTestValue
-    #                                                   - % Logevents > thresholdLogevents 
-    thresholdCatch=75
-    thresholdTestValue=3
-    thresholdLogevents=30
-    
-    sppCumCatch=list()
-    sppTestValue=list()
-    sppLogevents=list()
-    targetSpeciesByCluster=list()
-    
-    for (i in 1:nbClust){
-      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
-      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
-      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
-      
-      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
-      
-      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
-    
-      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
-      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
-    }
-
-    # List of metiers (level 7)
-    listMetiersL7=list()
-    for (i in 1:nbClust){
-      metiersClusteri=targetSpeciesByCluster[[i]]
-      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
-      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
-      listMetiersL7[[i]]=metiersClusteri
-    }
-  
-    # Metier (level 7) of each logevent
-    metierByLogeventL7=unlist(sapply(clusters$cluster,function(x) listMetiersL7[[x]]))
-
-
-
-    # Create csv tables
-    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
-    
-    dfClust=data.frame()
-    dfClust=paste("Clust ",1:nbClust,sep="")
-    for(i in 1:nbClust){
-      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
-      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
-      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
-    }
-    
-    
-    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
-    print(" --- end of step 3 ---")
-    print(Sys.time()-t1)
-
-    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, 
-    betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, 
-    summaryClusters=summaryClusters, testValues=resval, 
-    testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents,
-    descClusters=clusterDesc2, tabClusters=tabClusters,
-    targetSpecies=targetSpeciesByCluster))
-
-  } else
-
-
-                      
-
-
-########################################################################################################################################   PAM
-
-  if(methMetier=="pam"){
-
-    # Calculation of optimal k thanks to the silhouette (second maximum)
-    nbLog=nrow(datLog)
-
-    clustersPam.silcoeff=numeric()
-    clustersPam.silcoeff[1]=0
-    clustersPam.silcoeff[2]=0
-    clustersPam.silcoeff[3]=0
-    
-    k=2
-    compMax=1
-    repeat{
-      k=k+2
-      print(k)
-      clustersPam=pam(datLog,k)
-      clustersPam.silcoeff[k]=clustersPam$silinfo$avg.width
-      clustersPam=pam(datLog,k+1)
-      clustersPam.silcoeff[k+1]=clustersPam$silinfo$avg.width
-      if((clustersPam.silcoeff[k-2]<clustersPam.silcoeff[k-1] & clustersPam.silcoeff[k-1]>clustersPam.silcoeff[k]) & compMax<=2){
-        if(compMax==2){
-          nbClust=k-1
-          print(paste("2e max =",k-1))
-          print(paste("nbClust =",nbClust))
-          break
-        } else {
-          compMax=compMax+1
-          print(paste("compMax1 =",compMax))
-          print(paste("1er max =",k-1))
-        }
-      }
-      if((clustersPam.silcoeff[k-1]<clustersPam.silcoeff[k] & clustersPam.silcoeff[k]>clustersPam.silcoeff[k+1]) & compMax<=2){
-        if(compMax==2){
-          nbClust=k
-          print(paste("2e max =",k))
-          print(paste("nbClust =",nbClust))
-          break
-        } else {
-          compMax=compMax+1
-          print(paste("compMax2 =",compMax))
-          print(paste("1er max =",k))
-        }
-      }
-    Store(objects())
-    gc(reset=TRUE)
-    }
-
-    png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800)
-    plot(clustersPam.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette") # k optimal corresponds to second maximum of silhouette's coefficients
-    dev.off()
-
-    Store(objects())
-    gc(reset=TRUE)
-
-    cat("PamSilCoeff",clustersPam.silcoeff,"\n")
-
-    # PAM with optimal k
-    clusters=pam(datLog,nbClust)
-    summary(clusters)
-
-
-    # Within and between variance of clusters and classification
-    centerOfGravityClassif=numeric()
-    withinVarClusters=numeric()
-    sizeClusti=numeric()
-    centerOfGravityDatLog=colMeans(datLog)
-    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
-    for(k in 1:nbClust){  # Within variance by cluster
-
-      clusti=datLog[which(clusters$clustering==k),]
-      if(length(which(clusters$clustering==k))==1)  centerOfGravityClusti=clusti
-      else centerOfGravityClusti=colMeans(clusti)
-      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
-      sizeClusti[k]=length(which(clusters$clustering==k))
-      if(length(which(clusters$clustering==k))==1)  withinVarClusters[k]=0
-      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
-
-    }
-    # Between variance
-    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
-    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
-    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-
-
-    # Compute the test-values for species
-    resval=test.values(clusters$clustering,datSpecies)
-    # Determine the target species
-    target=targetspecies(resval)
-    nbClust=length(clusters$id.med)
-    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
-    
-    
-    # Compute the percentage of logevents catching each species by cluster
-    mainSpecies=colnames(datSpecies)
-    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
-    for(i in 1:nbClust){
-      percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)
-    }
-    
-
-    # Projections on the first factorial plans
-    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(2,3))
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
-    if(dim(datLog)[2]>2) {
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
-    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
-    if(dim(datLog)[2]>3) {
-    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
-    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
-    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
-    }}
-    par(op)
-    dev.off()
-
-
-    # For a paper
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_2_3_PAM',sep="_"), type='png')
-    dev.off()
-
-    
-    # Catch profile of the dataset
-    meanprofile=colMeans(datSpecies)
-    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
-    op <- par(las=2)
-    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
-    par(op)
-    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    dev.off()
-
-
-    # Catch profile by cluster
-    nbSpec=ncol(datSpecies)
-    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
-    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
-    dimnames(summaryClusters)[[2]]=names(meanprofile)
-    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      if(clusters$clusinfo[i,1]==1){
-        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }else{
-        summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }
-    }
-    # Species names for catch profile plots
-    nameSpPlot=character()
-    catchMeanThreshold=2
-    for(i in 1:nbClust){
-      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
-      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
-      nameSpPloti=rep("",nbSpec)
-      nameSpPloti[numSpi]=namSpi
-      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
-    }
-    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # For a paper : levelplot
-    X11(4,4)
-    mat <- t(summaryClusters["Mean",,])
-    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
-    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
-    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
-    sp <- apply(mat,2, function(x) length(which(x>20)))
-    sp2=sp[which(sp>=1)]
-    #colnames(mat)[sp==0] <- ""
-    mat2=mat[,which(colnames(mat)%in%names(sp2))]
-    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
-    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
-    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
-    dev.off()
-
-
-    # Standard deviation profile by cluster
-    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
-    namSdPlot=character()
-    SdThreshold=2
-    for(i in 1:nbClust){
-      if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec)
-      }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)}
-      namSDi=names(which(sdprofilclusti>SdThreshold))
-      numSDi=which(sdprofilclusti>SdThreshold)
-      namSdPloti=rep("",nbSpec)
-      namSdPloti[numSDi]=namSDi
-      sdprofil[i,]=sdprofilclusti
-      namSdPlot=rbind(namSdPlot,namSdPloti)
-    }
-    rownames(sdprofil) <- 1:nrow(sdprofil)
-    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # Number of Logevents by cluster
-    x=c(1:nbClust)
-    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
-    coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
-    barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
-    text(coord,clusters$clusinfo[,1]+200,clusters$clusinfo[,1],font=2,xpd=NA)
-    dev.off()
-
-
-    # Profile of test-values by cluster
-    targetresval=numeric()
-    nameTargetPlot=character()
-    for(i in 1:nbClust){
-      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      nameTargetPloti=rep("",nbSpec)
-      nameTargetPloti[numtargeti]=nomtargeti
-      nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
-      targetresvalclusti=rep(0,nbSpec)
-      targetresvalclusti[numtargeti]=resval[nomtargeti,i]
-      targetresval=rbind(targetresval,targetresvalclusti)
-    }
-
-    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-
-    # Descriptive tables of the clusters
-    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
-    for(i in 1:nbClust){
-      clusterDesc[,i]=c(i,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
-                        length(which(t(summaryClusters["Median",,i])>50)),
-                        length(which(resval[,i]>1.96)),
-                        length(which(resval[,i]>3.29)),
-                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)),
-                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)),
-                        clusters$clusinfo[i,1])
-    }
-    rownames(clusterDesc)=c("Number of species",
-                            "to have 50% of catch", "to have 90% of catch",
-                            "with a median higher than 50",
-                            "with a test-value > 1.96", "with a test-value > 3.29",
-                            "catch in 50% of the logevents", "catch in 90% of the logevents",
-                            "Clusters size")
-    colnames(clusterDesc)=1:nbClust
-    clusterDesc2=as.data.frame(clusterDesc)
-
-
-    # Summary tables of the clusters
-    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
-    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
-    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
-    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
-    listLibname=list()
-
-    for(i in 1:nbClust){
-      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
-      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
-      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
-      if(length(a)!=0){
-        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
-      }
-      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
-    }
-    
-    for(i in 1:nbClust){
-        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
-                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
-        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
-    }
-
-    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
-    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
-    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
-
-    for(i in 1:nbClust){
-      print("-----------------------------------------------------------------")
-      print(paste("Cluster",i))
-      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabPropCatch[i,1:length(propCatch)]=propCatch
-      print(propCatch)
-      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabTestVal[i,1:length(testVal)]=testVal
-      print(testVal)
-      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)]
-      tabPropLog[i,1:length(propLog)]=propLog
-      print(propLog)
-    }
-
-    tabClusters=array(0,dim=c(10,5,nbClust))
-    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
-    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
-    }
-
-    sizeTabClusters=numeric()
-    for(i in 1:nbClust){
-      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
-    }
-
-
-    # Target Species
-    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
-    #                                                   - Test-value > thresholdTestValue
-    #                                                   - % Logevents > thresholdLogevents 
-    thresholdCatch=75
-    thresholdTestValue=3
-    thresholdLogevents=30
-    
-    sppCumCatch=list()
-    sppTestValue=list()
-    sppLogevents=list()
-    targetSpeciesByCluster=list()
-    
-    for (i in 1:nbClust){
-      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
-      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
-      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
-      
-      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
-      
-      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
-    
-      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
-      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
-    }
-
-    # List of metiers (level 7)
-    listMetiersL7=list()
-    for (i in 1:nbClust){
-      metiersClusteri=targetSpeciesByCluster[[i]]
-      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
-      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
-      listMetiersL7[[i]]=metiersClusteri
-    }
-  
-    # Metier (level 7) of each logevent
-    metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]]))
-
-
-    # Create csv tables
-    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
-    
-    dfClust=data.frame()
-    dfClust=paste("Clust ",1:nbClust,sep="")
-    for(i in 1:nbClust){
-      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
-      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
-      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
-    }
-
-
-    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
-    print(" --- end of step 3 ---")
-    print(Sys.time()-t1)
-
-    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, 
-    betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, 
-    summaryClusters=summaryClusters, testValues=resval, 
-    testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents,
-    descClusters=clusterDesc2, tabClusters=tabClusters,
-    targetSpecies=targetSpeciesByCluster))
-
-  } else
-
-
-         
-
-
-########################################################################################################################################   CLARA
-
-  if(methMetier=="clara"){
-    nbLog=nrow(datLog)
-    propSample=0.1
-
-    # Calculation of optimal k thanks to the silhouette (second maximum)
-    clustersClara.silcoeff=numeric()
-    clustersClara.silcoeff[1]=0
-    clustersClara.silcoeff[2]=0
-    clustersClara.silcoeff[3]=0
-    k=2
-    compMax=1
-    repeat{
-      k=k+2
-      print(k)
-      clustersClara=clara(datLog, k, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*k)))
-      clustersClara.silcoeff[k]=clustersClara$silinfo$avg.width
-      clustersClara=clara(datLog, k+1, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*(k+1))))
-      clustersClara.silcoeff[k+1]=clustersClara$silinfo$avg.width
-      if((clustersClara.silcoeff[k-2]<clustersClara.silcoeff[k-1] & clustersClara.silcoeff[k-1]>clustersClara.silcoeff[k]) & compMax<=2){
-        if(compMax==2){
-          nbClust=k-1
-          print(paste("2e max =",k-1))
-          print(paste("nbClust =",nbClust))
-          break
-        } else {
-          compMax=compMax+1
-          print(paste("compMax1 =",compMax))
-          print(paste("1er max =",k-1))
-        }
-      }
-      if((clustersClara.silcoeff[k-1]<clustersClara.silcoeff[k] & clustersClara.silcoeff[k]>clustersClara.silcoeff[k+1]) & compMax<=2){
-        if(compMax==2){
-          nbClust=k
-          print(paste("2e max =",k))
-          print(paste("nbClust =",nbClust))
-          break
-        } else {
-          compMax=compMax+1
-          print(paste("compMax2 =",compMax))
-          print(paste("1er max =",k))
-        }
-      }
-    Store(objects())
-    gc(reset=TRUE)
-    }
-    
-
-    png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800)
-    plot(clustersClara.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette")               # k optimal corresponds to maximum of silhouette's coefficients
-    dev.off()
-
-    Store(objects())
-    gc(reset=TRUE)
-
-    cat("ClaraSilCoeff",clustersClara.silcoeff,"\n")
-
-
-    # CLARA with optimal k
-    clusters=clara(datLog, nbClust, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*nbClust)))  # CLARA with optimal k
-    summary(clusters)
-
-
-    # Within and between variance of clusters and classification
-    centerOfGravityClassif=numeric()
-    withinVarClusters=numeric()
-    sizeClusti=numeric()
-    centerOfGravityDatLog=colMeans(datLog)
-    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
-    for(k in 1:nbClust){  # Within variance by cluster
-
-      clusti=datLog[which(clusters$clustering==k),]
-      if(length(which(clusters$clustering==k))==1)  centerOfGravityClusti=clusti
-      else centerOfGravityClusti=colMeans(clusti)
-      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
-      sizeClusti[k]=length(which(clusters$clustering==k))
-      if(length(which(clusters$clustering==k))==1)  withinVarClusters[k]=0
-      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
-      
-    }
-    # Between variance
-    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
-    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
-    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
-
-
-    # Compute the test-values for species
-    resval=test.values(clusters$cluster,datSpecies)
-    # Determine the target species
-    target=targetspecies(resval)
-    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
-    
-    
-    # Compute the percentage of logevents catching each species by cluster
-    mainSpecies=colnames(datSpecies)
-    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
-    for(i in 1:nbClust){
-      percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)
-    }
-    
-    
-    # Projections on the first factorial plans
-    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(2,3))
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
-    if(dim(datLog)[2]>2) {
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
-    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
-    if(dim(datLog)[2]>3) {
-    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
-    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
-    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
-    }}
-    par(op)
-    dev.off()
-
-
-    # For a paper
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3")
-    abline(h=0, lty=2) ; abline(v=0, lty=2)
-    savePlot(filename=paste(analysisName,'projections_2_3_CLARA',sep="_"), type='png')
-    dev.off()
-
-    # Catch profile of the dataset
-    meanprofile=colMeans(datSpecies)
-    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
-    op <- par(las=2)
-    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
-    par(op)
-    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    dev.off()
-
-
-    # Catch profile by cluster
-    nbSpec=ncol(datSpecies)
-    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
-    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
-    dimnames(summaryClusters)[[2]]=names(meanprofile)
-    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      if(clusters$clusinfo[i,1]==1){
-        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }else{
-        summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2,
-          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
-      }
-    }
-    # Species names for catch profile plots
-    nameSpPlot=character()
-    catchMeanThreshold=2
-    for(i in 1:nbClust){
-      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
-      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
-      nameSpPloti=rep("",nbSpec)
-      nameSpPloti[numSpi]=namSpi
-      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
-    }
-    # Plot
-    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # For a paper : levelplot
-    X11(4,4)
-    mat <- t(summaryClusters["Mean",,])
-    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
-    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
-    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
-    sp <- apply(mat,2, function(x) length(which(x>20)))
-    sp2=sp[which(sp>=1)]
-    #colnames(mat)[sp==0] <- ""
-    mat2=mat[,which(colnames(mat)%in%names(sp2))]
-    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
-    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
-    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
-    dev.off()
-
-    # OR #
-    mat <- t(summaryClusters["Mean",,])
-    #mat=mat[,order(colnames(mat),decreasing=TRUE)]     # si on veut mettre les esp�ces par ordre descendant
-    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
-    sp <- apply(mat,2, function(x) length(which(x>20)))
-    colnames(mat)[sp==0] <- ""
-    cc <- colorRampPalette(c("white", "steelblue2", "blue4"),space = "rgb", interpolate="spline")
-    png(filename = paste(paste(analysisName,'mean_profile_by_cluster_levelplot_blue',sep="_"),".png",sep=""), width = 400, height = 800)
-    print(levelplot(mat, cuts=4, aspect=3, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.8), colorkey=list(space="right", at=c(0,20,40,60,80,100))))
-    dev.off()
-  
-
-    # Standard deviation profile by cluster
-    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
-    namSdPlot=character()
-    SdThreshold=5
-    for(i in 1:nbClust){
-      if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec)
-      }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)}
-      namSDi=names(which(sdprofilclusti>SdThreshold))
-      numSDi=which(sdprofilclusti>SdThreshold)
-      namSdPloti=rep("",nbSpec)
-      namSdPloti[numSDi]=namSDi
-      sdprofil[i,]=sdprofilclusti
-      namSdPlot=rbind(namSdPlot,namSdPloti)
-    }
-    rownames(sdprofil) <- 1:nrow(sdprofil)
-    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-    # Number of Logevents by cluster
-    x=c(1:nbClust)
-    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
-    coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
-    barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
-    text(coord,clusters$clusinfo[,1]+5,clusters$clusinfo[,1],font=2,xpd=NA)
-    dev.off()
-
-
-    # Profile of test-values by cluster
-    targetresval=matrix(0,nrow=nbClust,ncol=nbSpec)
-    colnames(targetresval)=colnames(datSpecies)
-    rownames(targetresval)=1:nbClust
-    nameTargetPlot=matrix(NA,nrow=nbClust,ncol=nbSpec)
-    for(i in 1:nbClust){
-      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
-      nameTargetPlot[i,numtargeti]=nomtargeti
-      targetresval[i,numtargeti]=resval[nomtargeti,i]
-    }
-
-    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
-    for(i in 1:nbClust){
-      op2 <- par(las=2)
-      barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value")
-      par(op2)
-      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
-    }
-    par(op)
-    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
-    dev.off()
-
-
-
-    # Descriptive tables of the clusters
-    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
-    for(i in 1:nbClust){
-      clusterDesc[,i]=c(i,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
-                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
-                        length(which(t(summaryClusters["Median",,i])>50)),
-                        length(which(resval[,i]>1.96)),
-                        length(which(resval[,i]>3.29)),
-                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)),
-                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)),
-                        clusters$clusinfo[i,1])
-    }
-    rownames(clusterDesc)=c("Number of species",
-                            "to have 50% of catch", "to have 90% of catch",
-                            "with a median higher than 50",
-                            "with a test-value > 1.96", "with a test-value > 3.29",
-                            "catch in 50% of the logevents", "catch in 90% of the logevents",
-                            "Clusters size")
-    colnames(clusterDesc)=1:nbClust
-    clusterDesc2=as.data.frame(clusterDesc)
-
-
-    # Summary tables of the clusters
-    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
-    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
-    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
-    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
-    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
-    listLibname=list()
-
-    for(i in 1:nbClust){
-      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
-      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
-      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
-      if(length(a)!=0){
-        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
-      }
-      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
-    }
-    
-    for(i in 1:nbClust){
-        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
-                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
-        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
-    }
-         
-    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
-    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
-    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
-
-    for(i in 1:nbClust){
-      print("-----------------------------------------------------------------")
-      print(paste("Cluster",i))
-      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabPropCatch[i,1:length(propCatch)]=propCatch
-      print(propCatch)
-      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
-      tabTestVal[i,1:length(testVal)]=testVal
-      print(testVal)
-      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)]
-      tabPropLog[i,1:length(propLog)]=propLog
-      print(propLog)
-    }
-
-    tabClusters=array(0,dim=c(10,5,nbClust))
-    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
-    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
-    for(i in 1:nbClust){
-      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
-    }
-
-    sizeTabClusters=numeric()
-    for(i in 1:nbClust){
-      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
-    }
-    
-    
-    # Target Species
-    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
-    #                                                   - Test-value > thresholdTestValue
-    #                                                   - % Logevents > thresholdLogevents 
-    thresholdCatch=75
-    thresholdTestValue=3
-    thresholdLogevents=30
-    
-    sppCumCatch=list()
-    sppTestValue=list()
-    sppLogevents=list()
-    targetSpeciesByCluster=list()
-    
-    for (i in 1:nbClust){
-      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
-      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
-      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
-      
-      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
-      
-      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
-    
-      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
-      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
-    }
-
-    # List of metiers (level 7)
-    listMetiersL7=list()
-    for (i in 1:nbClust){
-      metiersClusteri=targetSpeciesByCluster[[i]]
-      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
-      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
-      listMetiersL7[[i]]=metiersClusteri
-    }
-  
-    # Metier (level 7) of each logevent
-    metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]]))
-
-
-
-    # Create csv tables
-    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
-    
-    dfClust=data.frame()
-    dfClust=paste("Clust ",1:nbClust,sep="")
-    for(i in 1:nbClust){
-      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
-      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
-      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
-    }     
-
-
-    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
-    print(" --- end of step 3 ---")
-    print(Sys.time()-t1)
-
-    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters,
-    betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust,
-    summaryClusters=summaryClusters, testValues=resval, 
-    testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, 
-    descClusters=clusterDesc2, tabClusters=tabClusters,
-    targetSpecies=targetSpeciesByCluster))                                   
-
-  }  else stop("methMetier must be hac, kmeans, pam or clara")
-  # end of the methods
-
-
-} # end of the function "getMetierClusters"
-                          
+################################################################################
+#  STEP 3 OF THE MULTIVARIATE CLASSIFICATION :                                 #
+#         RUN THE CLUSTERING OF THE LOGEVENTS                                  #
+#         4 METHODS ARE AVALAIBLE : HAC / KMEANS / PAM / CLARA                 #
+################################################################################
+
+
+
+
+#' Finding metiers from a reduced EFLALO dataset, step 3: clustering logevents
+#' using various multivariate methods
+#' 
+#' This function represents the third step in the workflow processing logbooks
+#' data for identifying metiers.
+#' 
+#' This step allows applying various clustering analyses on the data sets
+#' coming out of the first and second step. All methods will lead to a
+#' classification of all individuals (logevents), but they differ in their
+#' nature and then consequently in their outcomes. The four methods available
+#' are - Hierarchical Ascending Classification (HAC), with user-defined method
+#' for estimating distances and link for aggregating individuals\cr -
+#' K-Means,\cr - Partitioning Around Medoids (PAM),\cr - Clustering LARge
+#' Applications (CLARA).
+#' 
+#' The HAC method works by calculating the distance between individuals using
+#' the method selected with param1 ("euclidean", "maximum", "manhattan",
+#' "canberra", "binary" or "minkowski") and aggregating them based on the
+#' distance between clusters, using the link selected with param2 ("ward",
+#' "single", "complete", "average", "mcquitty", "median" or "centroid"). In
+#' HAC, the number of classes is determined afterwards, once all combinations
+#' have been calculated, by using the objective criteria of scree test which
+#' detects the third marginal loss of inertia between two consecutive numbers
+#' of classes. Therefore, the computing time and memory request for this method
+#' can be quite comprehensive, and may reach memory limits on standard PC when
+#' operating with very large datasets.
+#' 
+#' The K-Means method works by randomly choosing k individuals, or kernels (k
+#' corresponding to the final number of classes), and then affecting each
+#' individuals of the dataset to the closest kernel. Each time, the gravity
+#' center of the class is recalculated, thus reinitialising the calculation of
+#' distances to the next individual. In order to define the most appropriate
+#' number of classes, this procedure is repeated with differents values for k,
+#' from 2 to 15. The final number of classes is identified by using the
+#' criteria of scree test which detects the third marginal loss of inertia
+#' between two consecutive numbers of classes.
+#' 
+#' The PAM method works slightly around the same principle, starting with the
+#' initialisation of k medoids. The medoid is the individual in a class which
+#' shows least dissimilarity with other individuals in the same class, and the
+#' remaining individuals are affected to their closest medoid. Then the sum of
+#' dissimilarities is calculated and compared with the sum of dissimilarities
+#' if any other individual in the class had been playing the role of the
+#' medoid, and then the medoid is eventually adjusted accordingly, until full
+#' stabilisation of the procedure. The most appropriate number of classes is
+#' identified by using the estimated silhouette of the classification for each
+#' value of k. The silhouette represent an average comparison of the distance
+#' between an individual and the other individuals from its class, and between
+#' the same individual and the other individuals from the next closest class,
+#' and is therefore an objective measurement of the quality of the
+#' classification. The final number of classes retained is the one for which
+#' the second maximum of the silhouettes is reached. It is to be noted that the
+#' PAM method is not designed for working with very large datasets (>10 000
+#' lines), and may quickly reach memory limits.
+#' 
+#' The CLARA method is an extension of the PAM algorithm aiming at working with
+#' large datasets, and is therefore more efficient and powerful than PAM.  It
+#' works by sampling 5 subsets of the dataset for each value of k and running
+#' the PAM algorithm explained above on these subsets, and then to keep only
+#' the subset giving the best classification (silhouette). As in PAM, the final
+#' number of classes retained is the one for which the second maximum of the
+#' silhouettes is reached. Afterwards, all remaining individuals are affected
+#' to their closest medoid, using user-defined method (param1) for calculating
+#' distances ('euclidean' being used as the default, but 'manhattan' could also
+#' be used).
+#' 
+#' 
+#' @param datSpecies numerical matrix with logevents as rows and species as
+#' columns, with percentage values (between 0 and 100) of each species in the
+#' logevent catches.  Logevent ID (LE_ID) should be as row names. Typically,
+#' this input table will be produced from the step 1 of the metier analysis
+#' applied on the eflalo initial data, using the function
+#' extractTableMainSpecies()
+#' @param datLog numerical matrix with logevents as rows, and values to be used
+#' for calculating distances between individuals as columns.  Typically, this
+#' input table is produced by the step 2 of the metier analysis, using the
+#' function getTableAfterPCA(). If a PCA was run, selected Principal Components
+#' will appear as columns. If no PCA was run, the matrix will be the same as
+#' datSpecies, with percentage values by species.
+#' @param analysisName character, the name of the run. Used for the file name
+#' of the plots.
+#' @param methMetier character. The name of the clustering method to be used.
+#' Must be chosen between "hac", "kmeans", "pam" and "clara".
+#' @param param1 character. Parameter used for chosing the method calculating
+#' distances between individuals, to be used in HAC and CLARA algorithms.  For
+#' HAC, it can be chosen between "euclidean", "maximum", "manhattan",
+#' "canberra", "binary" or "minkowski". For CLARA, between "euclidean" and
+#' "manhattan".  For PAM and K-means, this must be set to NULL.
+#' @param param2 character. Parameter used for chosing the method calculating
+#' distances between clusters, to be used in HAC algorithm.  For HAC, it can be
+#' chosen between "ward", "single", "complete", "average", "mcquitty", "median"
+#' or "centroid".  For PAM, CLARA and K-means, this must be set to NULL.
+#' @return For any of the four methods used, a number of graphs are produced
+#' and saved directly in the working directory.  They describe 1) projections
+#' of results on factorial plans, 2) the mean profile of the whole dataset,
+#' i.e. the average percentage of the various species in a logevent across all
+#' individuals, 3) the mean and standard deviation profile in terms of average
+#' percentage of catch by species within each cluster, 4) the number of
+#' logevents by clusters, and 5) the profile of test-values by cluster.
+#' 
+#' Finally, the function returns a list with a number of results and
+#' diagnostics on the performance of the method: \item{LE_ID_clust}{a data
+#' frame with two columns, linking the initial ID name of the Logevent (LE_ID)
+#' with the cluster metier where the ID has been allocated. }
+#' \item{clusters}{diagnostics of the clustering process. It may vary between
+#' the four methods.  } \item{betweenVarClassifOnTot}{percentage of variance
+#' explained by the classification.  } \item{nbClust}{final number of clusters
+#' retained.  } \item{summaryClusters}{array documenting, for each cluster, the
+#' minimum, mean, maximum, as well as the 25\%, 50\% and 75\% quantiles values
+#' of the percentage of catch by species for the individual logevents in the
+#' cluster.  } \item{testValues}{matrix of test-values by species and cluster.
+#' The test-value measures for each species the difference between the average
+#' percentage of catches in the cluster compared to the average percentage of
+#' catch in the total dataset, thus large positive values (>1.98) will point
+#' out the most characteristic species in the clusters.  }
+#' \item{testValuesSpecies}{a tabulated list ranking the most characteristic
+#' species in the clusters (ordering species with a test-value > 1.98 by
+#' decreasing order of test-value).  } \item{percLogevents}{a matrix giving the
+#' percentage of logevents catching each species by cluster. }
+#' \item{descClusters}{a data frame giving some descriptive statistics for each
+#' cluster, like cluster size, number of species needed to have at least 50\%
+#' of the cluster's total catch, number of species with a test-value > 1.98 in
+#' the cluster, number of species caught in at least 50\% of the logevents,
+#' etc... } \item{tabClusters}{a 3d-array giving a table summurazing for each
+#' cluster the most important species (in terms of catch), the associated
+#' test-value, and the percentage of logevents of the cluster catching these
+#' species.  } \item{targetSpecies}{a list giving the target species by
+#' cluster. }
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @seealso \code{\link{getEflaloMetierLevel7}},
+#' \code{\link{selectMainSpecies}}, \code{\link{extractTableMainSpecies}},
+#' \code{\link{getMetierClusters}}, \code{\link{getTableAfterPCA}}
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @examples
+#' 
+#' 
+#'   \dontrun{
+#'  
+#'   data(eflalo)
+#'   
+#'   eflalo <- formatEflalo(eflalo)
+#' 
+#'   eflalo <- eflalo[eflalo$LE_GEAR=="OTB",]
+#'   # note that output plots will be sent to getwd()
+#'   analysisName <- "metier_analysis_OTB"
+#'   
+#'   dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))]
+#'   names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]]))
+#' 
+#'   explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE)
+#'     #=> send the LE_ID and LE_KG_SP columns only
+#'            
+#'   Step1 <- extractTableMainSpecies(dat, explo$NamesMainSpeciesHAC,
+#'               paramTotal=95, paramLogevent=100)
+#'     #=> send the LE_ID and LE_KG_SP columns only             
+#' 
+#'   rowNamesSave <- row.names(Step1)
+#'   row.names(Step1) <- 1:nrow(Step1)
+#' 
+#'   # Run a PCA
+#'   Step2 <- getTableAfterPCA(Step1, analysisName, pcaYesNo="pca",
+#'               criterion="70percents")
+#' 
+#'   row.names(Step1) <- rowNamesSave
+#'   row.names(Step2) <- rowNamesSave
+#'   
+#'   # Define a metier for each logevent running the CLARA algorithm 
+#'   Step3 <- getMetierClusters(Step1, Step2, analysisName,
+#'               methMetier="clara", param1="euclidean", param2=NULL)
+#'               
+#'   }
+#' 
+#' 
+#' @export getMetierClusters
+getMetierClusters = function(datSpecies,datLog,analysisName="",methMetier="clara",param1="euclidean",param2=NULL){
+
+  # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5).
+  data(correspLevel7to5)
+  require(lattice)
+
+  LE_ID=rownames(datSpecies)
+  nbSpec=ncol(datSpecies)
+  datSpecies=as.matrix(datSpecies,ncol=nbSpec,nrow=length(LE_ID))
+
+  print("######## STEP 3 CLUSTERING ########")
+
+  t1=Sys.time()
+  print(paste(" --- selected method :",methMetier, " ---"))
+
+
+########################################################################################################################################   HAC
+
+  if(methMetier=="hac"){
+
+    classifWithinVar=numeric()
+    classifBetweenVar=numeric()
+    classifQuality=numeric()
+    sampleList=numeric()
+    mProfilSample=numeric()
+    classifVarExplain=numeric()
+
+    nbLog=nrow(datLog)
+    nbDim=ncol(datLog)
+
+    # Center of gravity of datLog
+    centerOfGravityDatLog=colMeans(datLog)
+
+    # HAC like CLARA (HAC on sample, affectation of each logevent to a cluster, quality of classification, do it 5 times, choose the sample which gives the best quality of classification)
+    print("hac on subsets...")
+
+    for(i in 1:5){
+
+      numSample=i
+      print(paste("sample",i))
+      # Sample of size 10000 logevents or 30% of all logevents
+      minsam=min(nbLog,max(10000,round(nbLog*30/100)))
+      sam=sample(1:nbLog,size=minsam,replace=FALSE)
+      # Record the 5 samples
+      sampleList=rbind(sampleList,sam)
+      outofsam=setdiff(1:nbLog,sam)
+      sampleDatLog=datLog[sam,]
+      sampleDatSpecies=datSpecies[sam,]
+
+      # HAC on the sample
+      log.hac=hcluster(sampleDatLog, method=param1, link=param2)
+      inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)]
+      nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3]
+
+      # Cut the dendogram at the selected level
+      sampleClusters=cutree(log.hac,k=nbClust)
+
+      Store(objects())
+      gc(reset=TRUE)
+
+      # Add the cluster to each logevent of the sample
+      sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters)
+
+      sampleClusters=sampleDatLogWithClusters[,ncol(sampleDatLogWithClusters)]
+
+      # Within and between variance of clusters and classification
+      centerOfGravityClassif=numeric()
+      withinVarClusters=numeric()
+      sizeClusti=numeric()
+      centerOfGravitySampleDatLog=colMeans(sampleDatLog)
+      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravitySampleDatLog)
+      for(k in 1:nbClust){  # Within variance by cluster
+
+        clusti=sampleDatLogWithClusters[which(sampleClusters==k),1:nbDim]
+        if(length(which(sampleClusters==k))==1)  centerOfGravityClusti=clusti
+        else centerOfGravityClusti=colMeans(clusti)
+        centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
+        sizeClusti[k]=length(which(sampleClusters==k))
+        if(length(which(sampleClusters==k))==1)  withinVarClusters[k]=0
+        else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
+
+      }
+      # Between variance
+      classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
+      # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
+      withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+      betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+      classifVarExplain=c(classifVarExplain,betweenVarClassifOnTot)
+
+
+      # Catch profile by cluster for each sample
+      nbSpec=ncol(datSpecies)
+      mprofil=numeric()
+      blank=rep(00,nbSpec)
+      for(k in 1:nbClust){
+        mprofilclusti=colMeans(sampleDatSpecies[which(sampleClusters==k),])
+        mprofil=rbind(mprofil,mprofilclusti)
+      }
+      mprofil=rbind(mprofil,blank)
+
+      mProfilSample=rbind(mProfilSample,mprofil)
+
+
+      # Graphics
+
+      # Calculation of each cluster size
+      sizeClusters=numeric()
+      for(k in 1:nbClust){
+        sizeClusters[k]=length(which(sampleClusters==k))
+      }
+
+      # Compute the test-values for species
+      resval=test.values(sampleClusters,sampleDatSpecies)
+      # Determine the target species
+      target=targetspecies(resval)
+
+
+      # Projections on the first factorial plans
+      png(paste(analysisName,numSample,"Sample_Projections.png",sep="_"), width = 1200, height = 800)
+      op <- par(mfrow=c(2,3))
+      plot(sampleDatLog[,1], sampleDatLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
+      if(dim(datLog)[2]>2) {
+      plot(sampleDatLog[,2], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
+      plot(sampleDatLog[,1], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
+      if(dim(datLog)[2]>3) {
+      plot(sampleDatLog[,1], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
+      plot(sampleDatLog[,2], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
+      plot(sampleDatLog[,3], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
+      }}
+      par(op)
+      dev.off()
+
+
+      # Rectangles plotting
+      png(paste(analysisName,numSample,"Sample_Dendogram.png",sep="_"), width = 1200, height = 800)
+      plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE)
+      title(main="HAC dendogram",xlab="Logevents",ylab="Height")
+      rect.hclust(log.hac, k=nbClust)
+      dev.off()
+
+
+      # Catch profile of the dataset
+      meanprofile=colMeans(sampleDatSpecies)
+      png(paste(analysisName,numSample,"Sample_Catch profile of the sample.png",sep="_"), width = 1200, height = 800)
+      op <- par(las=2)
+      barplot(meanprofile, main="Catch profile of the sample", xlab="Species", ylab="Percentage of catch")
+      par(op)
+      mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+      dev.off()
+
+
+      # Catch profile by cluster
+      nbSpec=ncol(sampleDatSpecies)
+      summarySampleClusters=array(0,dim=c(6,nbSpec,nbClust))
+      dimnames(summarySampleClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
+      dimnames(summarySampleClusters)[[2]]=names(meanprofile)
+      dimnames(summarySampleClusters)[[3]]=paste("Cluster",1:nbClust)
+      for(k in 1:nbClust){
+        if(sizeClusters[k]==1){
+          summarySampleClusters[,,k]=apply(t(as.matrix(sampleDatSpecies[which(sampleClusters==k),])),2,
+            function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+        }else{
+          summarySampleClusters[,,k]=apply(sampleDatSpecies[which(sampleClusters==k),],2,
+            function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+        }
+      }
+      # Species names for catch profile plots
+      nameSpPlot=character()
+      catchMeanThreshold=2
+      for(k in 1:nbClust){
+        namSpi=names(meanprofile[which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold)])
+        numSpi=which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold)
+        nameSpPloti=rep("",nbSpec)
+        nameSpPloti[numSpi]=namSpi
+        nameSpPlot=rbind(nameSpPlot,nameSpPloti)
+      }
+      png(paste(analysisName,numSample,"Sample_Mean profile by cluster of the sample.png",sep="_"), width = 1200, height = 800)
+      op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+      for(k in 1:nbClust){
+        op2 <- par(las=2)
+        barplot(t(summarySampleClusters["Mean",,k]), names.arg=nameSpPlot[k,], xlab="Species", ylab="Percentage of catch", col="gray")
+        par(op2)
+        mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+      }
+      par(op)
+      title(main=paste("Catch profile by cluster of the sample","\n","\n",sep=""))
+      dev.off()
+
+
+      # Standard deviation profile by cluster
+      sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
+      namSdPlot=character()
+      SdThreshold=2
+      for(k in 1:nbClust){
+        if(length(which(sampleClusters==k))==1){ sdprofilclusti=rep(0,nbSpec)
+        }else{sdprofilclusti=apply(sampleDatSpecies[which(sampleClusters==k),],2,sd)}
+        namSDi=names(which(sdprofilclusti>SdThreshold))
+        numSDi=which(sdprofilclusti>SdThreshold)
+        namSdPloti=rep("",nbSpec)
+        namSdPloti[numSDi]=namSDi
+        sdprofil[k,]=sdprofilclusti
+        namSdPlot=rbind(namSdPlot,namSdPloti)
+      }
+      rownames(sdprofil) <- 1:nrow(sdprofil)
+      png(paste(analysisName,numSample,"Sample_Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
+      op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+      for(k in 1:nbClust){
+        op2 <- par(las=2)
+        barplot(sdprofil[k,], names.arg=namSdPlot[k,], xlab="Species", ylab="Percentage of catch")
+        par(op2)
+        mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+      }
+      par(op)
+      title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
+      dev.off()
+
+
+      # Number of Logevents by cluster
+      x=c(1:nbClust)
+      png(paste(analysisName,numSample,"Sample_Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
+      coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
+      barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
+      text(coord,sizeClusters+100,sizeClusters,font=2,xpd=NA)
+      dev.off()
+
+
+      # Target Species profiles (test-value)
+      targetresval=numeric()
+      nameTargetPlot=character()
+      for(k in 1:nbClust){
+        nomtargeti=as.character(target$tabnomespcib[k,which(!is.na(target$tabnumespcib[k,]))])
+        numtargeti=as.numeric(target$tabnumespcib[k,which(!is.na(target$tabnumespcib[k,]))])
+        nameTargetPloti=rep("",nbSpec)
+        nameTargetPloti[numtargeti]=nomtargeti
+        nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
+        targetresvalclusti=rep(0,nbSpec)
+        targetresvalclusti[numtargeti]=resval[nomtargeti,k]
+        targetresval=rbind(targetresval,targetresvalclusti)
+      }
+
+      png(paste(analysisName,numSample,"Sample_Profile of target species by cluster.png",sep="_"), width = 1200, height = 800)
+      op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+      for(k in 1:nbClust){
+        op2 <- par(las=2)
+        barplot(targetresval[k,],names.arg=nameTargetPlot[k,], cex.names=1, xlab="Species", ylab="Test-value")
+        par(op2)
+        mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+      }
+      par(op)
+      title(main=paste("Profile of target species by cluster","\n","\n",sep=""))
+      dev.off()
+
+      Store(objects())
+      gc(reset=TRUE)
+
+    } # end of for(i in 1:5)
+
+
+
+
+    # Select the sample which gives the smaller classification's quality (the best sample)
+    sam=sampleList[which.max(classifVarExplain),]
+    outofsam=setdiff(1:nbLog,sam)
+    sampleDatLog=datLog[sam,]
+
+    nbLogSample=nrow(sampleDatLog)
+    nbDim=ncol(sampleDatLog)
+
+
+    # HAC with the best sample
+    print("Final HAC")
+    log.hac=hcluster(sampleDatLog, method=param1, link=param2)
+
+
+    # Determine the number of cluster thanks to the scree-test
+    inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)]
+    nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3]
+
+    # Cut the dendogram at the selected level
+    sampleClusters=cutree(log.hac,k=nbClust)
+    sampleClusters=as.factor(sampleClusters)
+
+    sampleDatLogWithClusters=data.frame()
+    sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters)
+    sampleDatLogWithClusters=as.data.frame(sampleDatLogWithClusters)
+
+    # Discriminante analysis on the learning dataset
+    learning=lda(sampleClusters~.,data=sampleDatLogWithClusters)
+
+    otherLog=datLog[outofsam,]
+    otherLog=as.data.frame(otherLog)
+
+    # Predict the cluster for the other logevent
+    pred=predict(learning,otherLog)
+    otherDatLogWithClusters=cbind(otherLog,pred$class)
+    colnames(otherDatLogWithClusters)=colnames(sampleDatLogWithClusters)
+
+    # Rebuilt complete datLog with clusters
+    clusters=numeric(length=nbLog)
+    clusters[sam]=sampleClusters
+    clusters[outofsam]=pred$class
+
+
+    # Within and between variance of clusters and classification
+    centerOfGravityClassif=numeric()
+    withinVarClusters=numeric()
+    sizeClusti=numeric()
+    centerOfGravityDatLog=colMeans(datLog)
+    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
+    for(k in 1:nbClust){  # Within variance by cluster
+
+      clusti=datLog[which(clusters==k),1:nbDim]
+      if(length(which(clusters==k))==1)  centerOfGravityClusti=clusti
+      else centerOfGravityClusti=colMeans(clusti)
+      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
+      sizeClusti[k]=length(which(clusters==k))
+      if(length(which(clusters==k))==1)  withinVarClusters[k]=0
+      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
+
+    }
+    # Between variance
+    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
+    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
+    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+
+
+    # Calculation of each cluster size
+    n=nrow(datLog)
+    sizeClusters=numeric()
+    for(k in 1:nbClust){
+      sizeClusters[k]=length(which(clusters==k))
+    }
+
+    # Compute the test-values for species
+    resval=test.values(clusters,datSpecies)
+    # Determine the target species
+    target=targetspecies(resval)
+    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
+
+
+    # Compute the percentage of logevents catching each species by cluster
+    mainSpecies=colnames(datSpecies)
+    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
+    for(i in 1:nbClust){
+      percLogevents[i,]=round(sapply(mainSpecies,function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)
+    }
+    
+
+    # Projections on the first factorial plans
+    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(2,3))
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
+    if(dim(datLog)[2]>2) {
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
+    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
+    if(dim(datLog)[2]>3) {
+    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
+    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
+    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
+    }}
+    par(op)
+    dev.off()
+
+    # For a paper
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 2", ylab="Axis 3")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_2_3_HAC',sep="_"), type='png')
+    dev.off()
+
+
+    # Rectangles plotting
+    png(paste(analysisName,"Dendogram.png",sep="_"), width = 1200, height = 800)
+    plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE)
+    title(main="HAC dendogram",xlab="Logevents",ylab="Height")
+    rect.hclust(log.hac, k=nbClust)
+    dev.off()
+
+
+    # Catch profile of the dataset
+    meanprofile=colMeans(datSpecies)
+    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
+    op <- par(las=2)
+    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
+    par(op)
+    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    dev.off()
+
+
+    # Catch profile by cluster
+    nbSpec=ncol(datSpecies)
+    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
+    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
+    dimnames(summaryClusters)[[2]]=names(meanprofile)
+    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      if(sizeClusters[i]==1){
+        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters==i),])),2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }else{
+        summaryClusters[,,i]=apply(datSpecies[which(clusters==i),],2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }
+    }
+    # Species names for catch profile plots
+    nameSpPlot=character()
+    catchMeanThreshold=2
+    for(i in 1:nbClust){
+      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
+      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
+      nameSpPloti=rep("",nbSpec)
+      nameSpPloti[numSpi]=namSpi
+      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
+    }
+    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # For a paper : levelplot
+    X11(4,4)
+    mat <- t(summaryClusters["Mean",,])
+    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
+    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
+    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
+    sp <- apply(mat,2, function(x) length(which(x>20)))
+    sp2=sp[which(sp>=1)]
+    #colnames(mat)[sp==0] <- ""
+    mat2=mat[,which(colnames(mat)%in%names(sp2))]
+    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
+    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
+    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
+    dev.off()
+
+
+    # Standard deviation profile by cluster
+    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
+    namSdPlot=character()
+    SdThreshold=2
+    for(i in 1:nbClust){
+      if(length(which(clusters==i))==1){ sdprofilclusti=rep(0,nbSpec)
+      }else{sdprofilclusti=apply(datSpecies[which(clusters==i),],2,sd)}
+      namSDi=names(which(sdprofilclusti>SdThreshold))
+      numSDi=which(sdprofilclusti>SdThreshold)
+      namSdPloti=rep("",nbSpec)
+      namSdPloti[numSDi]=namSDi
+      sdprofil[i,]=sdprofilclusti
+      namSdPlot=rbind(namSdPlot,namSdPloti)
+    }
+    rownames(sdprofil) <- 1:nrow(sdprofil)
+    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # Number of Logevents by cluster
+    x=c(1:nbClust)
+    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
+    coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
+    barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
+    text(coord,sizeClusters+400,sizeClusters,font=2,xpd=NA)
+    dev.off()
+
+
+    # Profile of test-values by cluster
+    targetresval=numeric()
+    nameTargetPlot=character()
+    for(i in 1:nbClust){
+      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      nameTargetPloti=rep("",nbSpec)
+      nameTargetPloti[numtargeti]=nomtargeti
+      nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
+      targetresvalclusti=rep(0,nbSpec)
+      targetresvalclusti[numtargeti]=resval[nomtargeti,i]
+      targetresval=rbind(targetresval,targetresvalclusti)
+    }
+
+    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(targetresval[i,],names.arg=nameTargetPlot[i,], cex.names=1, xlab="Species", ylab="Test-value")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+
+
+    # Descriptive tables of the clusters
+    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
+    for(i in 1:nbClust){
+      clusterDesc[,i]=c(i,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
+                        length(which(t(summaryClusters["Median",,i])>50)),
+                        length(which(resval[,i]>1.96)),
+                        length(which(resval[,i]>3.29)),
+                        length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>50)),
+                        length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>90)),
+                        sizeClusters[i])
+    }
+    rownames(clusterDesc)=c("Number of species",
+                            "to have 50% of catch", "to have 90% of catch",
+                            "with a median higher than 50",
+                            "with a test-value > 1.96", "with a test-value > 3.29",
+                            "catch in 50% of the logevents", "catch in 90% of the logevents",
+                            "Clusters size")
+    colnames(clusterDesc)=1:nbClust
+    clusterDesc2=as.data.frame(clusterDesc)
+
+
+    # Summary tables of the clusters
+    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
+    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
+    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
+    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
+    listLibname=list()
+    
+    for(i in 1:nbClust){
+      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
+      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
+      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
+      if(length(a)!=0){
+        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
+      }
+      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
+    }
+    
+    for(i in 1:nbClust){
+        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
+                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
+        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
+    }
+
+    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
+    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
+    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
+
+    for(i in 1:nbClust){
+      print("-----------------------------------------------------------------")
+      print(paste("Cluster",i))
+      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabPropCatch[i,1:length(propCatch)]=propCatch
+      print(propCatch)
+      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabTestVal[i,1:length(testVal)]=testVal
+      print(testVal)
+      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)>=0.1)]
+      tabPropLog[i,1:length(propLog)]=propLog
+      print(propLog)
+    }
+
+    tabClusters=array(0,dim=c(10,5,nbClust))
+    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
+    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
+    }
+
+    sizeTabClusters=numeric()
+    for(i in 1:nbClust){
+      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
+    }
+
+
+    # Target Species
+    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
+    #                                                   - Test-value > thresholdTestValue
+    #                                                   - % Logevents > thresholdLogevents 
+    thresholdCatch=75
+    thresholdTestValue=3
+    thresholdLogevents=30
+    
+    sppCumCatch=list()
+    sppTestValue=list()
+    sppLogevents=list()
+    targetSpeciesByCluster=list()
+    
+    for (i in 1:nbClust){
+      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
+      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
+      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
+      
+      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
+      
+      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
+    
+      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
+      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
+    }
+
+    # List of metiers (level 7)
+    listMetiersL7=list()
+    for (i in 1:nbClust){
+      metiersClusteri=targetSpeciesByCluster[[i]]
+      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
+      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
+      listMetiersL7[[i]]=metiersClusteri
+    }
+  
+    # Metier (level 7) of each logevent
+    metierByLogeventL7=unlist(sapply(clusters,function(x) listMetiersL7[[x]]))
+
+
+
+    # Create csv tables
+    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
+    
+    dfClust=data.frame()
+    dfClust=paste("Clust ",1:nbClust,sep="")
+    for(i in 1:nbClust){
+      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
+      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
+      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
+    }
+    
+    
+    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
+    print(" --- end of step 3 ---")
+    print(Sys.time()-t1)
+
+    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, sizeClusters=sizeClusters,
+     betweenVarClassifOnTot=betweenVarClassifOnTot, mProfilSample=mProfilSample, 
+     nbClust=nbClust, summaryClusters=summaryClusters, testValues=resval, 
+     testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents,
+     descClusters=clusterDesc2, tabClusters=tabClusters,
+     targetSpecies=targetSpeciesByCluster))
+
+  }   else
+
+
+
+                          
+
+########################################################################################################################################   KMEANS
+
+  if(methMetier=="kmeans"){
+    # Calculation of optimal k thanks to within variance
+    nbLog=nrow(datLog)
+
+    varintra=numeric()
+    for (k in 2:15){
+      clustersKmeans=kmeans(datLog, k, iter.max=60, nstart=10)
+      varintra[k]=1/nbLog*sum(clustersKmeans$withinss)
+    }
+    nbClust=which(scree(varintra)[,"epsilon"]<0)[3]
+    
+
+    png(paste(analysisName,"Within variance of the classification.png",sep="_"), width = 1200, height = 800)
+    plot(varintra,main="Within clusters variance",xlab="Number of clusters",ylab="Within Variance")
+    dev.off()
+
+    Store(objects())
+    gc(reset=TRUE)
+
+    # KMEANS with k optimal
+    clusters=kmeans(datLog, nbClust, iter.max=60, nstart=10, algorithm="Hartigan-Wong")
+
+
+    # Within and between variance of clusters and classification
+    centerOfGravityClassif=numeric()
+    withinVarClusters=numeric()
+    sizeClusti=numeric()
+    centerOfGravityDatLog=colMeans(datLog)
+    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
+    for(k in 1:nbClust){  # Within variance by cluster
+
+      clusti=datLog[which(clusters$cluster==k),]
+      if(length(which(clusters$cluster==k))==1)  centerOfGravityClusti=clusti
+      else centerOfGravityClusti=colMeans(clusti)
+      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
+      sizeClusti[k]=length(which(clusters$cluster==k))
+      if(length(which(clusters$cluster==k))==1)  withinVarClusters[k]=0
+      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
+
+    }
+    # Between variance
+    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
+    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
+    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+
+
+    # Compute the test-values for species
+    resval=test.values(clusters$cluster,datSpecies)
+    # Determine the target species
+    target=targetspecies(resval)
+    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
+    
+    
+    # Compute the percentage of logevents catching each species by cluster
+    mainSpecies=colnames(datSpecies)
+    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
+    for(i in 1:nbClust){
+      percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)
+    }
+    
+
+    # Projections on the first factorial plans
+    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(2,3))
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
+    if(dim(datLog)[2]>2) {
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
+    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
+    if(dim(datLog)[2]>3) {
+    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
+    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
+    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
+    }}
+    par(op)
+    dev.off()
+
+    # For a paper
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 2", ylab="Axis 3")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_2_3_Kmeans',sep="_"), type='png')
+    dev.off()
+
+
+    # Catch profile of the dataset
+    meanprofile=colMeans(datSpecies)
+    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
+    op <- par(las=2)
+    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
+    par(op)
+    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    dev.off()
+
+
+    # Catch profile by cluster
+    nbSpec=ncol(datSpecies)
+    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
+    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
+    dimnames(summaryClusters)[[2]]=names(meanprofile)
+    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      if(clusters$size[i]==1){
+        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$cluster==i),])),2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }else{
+        summaryClusters[,,i]=apply(datSpecies[which(clusters$cluster==i),],2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }
+    }
+    # Species names for catch profile plots
+    nameSpPlot=character()
+    catchMeanThreshold=2
+    for(i in 1:nbClust){
+      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
+      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
+      nameSpPloti=rep("",nbSpec)
+      nameSpPloti[numSpi]=namSpi
+      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
+    }
+    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # For a paper : levelplot
+    X11(4,4)
+    mat <- t(summaryClusters["Mean",,])
+    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
+    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
+    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
+    sp <- apply(mat,2, function(x) length(which(x>20)))
+    sp2=sp[which(sp>=1)]
+    #colnames(mat)[sp==0] <- ""
+    mat2=mat[,which(colnames(mat)%in%names(sp2))]
+    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
+    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
+    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
+    dev.off()
+
+
+    # Standard deviation profile by cluster
+    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
+    namSdPlot=character()
+    SdThreshold=2
+    for(i in 1:nbClust){
+      if(length(which(clusters$cluster==i))==1){ sdprofilclusti=rep(0,nbSpec)
+      }else{sdprofilclusti=apply(datSpecies[which(clusters$cluster==i),],2,sd)}
+      namSDi=names(which(sdprofilclusti>SdThreshold))
+      numSDi=which(sdprofilclusti>SdThreshold)
+      namSdPloti=rep("",nbSpec)
+      namSdPloti[numSDi]=namSDi
+      sdprofil[i,]=sdprofilclusti
+      namSdPlot=rbind(namSdPlot,namSdPloti)
+    }
+    rownames(sdprofil) <- 1:nrow(sdprofil)
+    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # Number of Logevents by cluster
+    x=c(1:nbClust)
+    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
+    coord=barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
+    barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
+    text(coord,clusters$size+400,clusters$size,font=2,xpd=NA)
+    dev.off()
+
+
+    # Profile of test-values by cluster
+    targetresval=numeric()
+    nameTargetPlot=character()
+    for(i in 1:nbClust){
+      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      nameTargetPloti=rep("",nbSpec)
+      nameTargetPloti[numtargeti]=nomtargeti
+      nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
+      targetresvalclusti=rep(0,nbSpec)
+      targetresvalclusti[numtargeti]=resval[nomtargeti,i]
+      targetresval=rbind(targetresval,targetresvalclusti)
+    }
+
+    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(targetresval[i,], cex.names=1, names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+
+
+    # Descriptive tables of the clusters
+    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
+    for(i in 1:nbClust){
+      clusterDesc[,i]=c(i,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
+                        length(which(t(summaryClusters["Median",,i])>50)),
+                        length(which(resval[,i]>1.96)),
+                        length(which(resval[,i]>3.29)),
+                        length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>50)),
+                        length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>90)),
+                        clusters$size[i])
+    }
+    rownames(clusterDesc)=c("Number of species",
+                            "to have 50% of catch", "to have 90% of catch",
+                            "with a median higher than 50",
+                            "with a test-value > 1.96", "with a test-value > 3.29",
+                            "catch in 50% of the logevents", "catch in 90% of the logevents",
+                            "Clusters size")
+    colnames(clusterDesc)=1:nbClust
+    clusterDesc2=as.data.frame(clusterDesc)
+
+
+    # Summary tables of the clusters
+    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
+    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
+    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
+    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
+    listLibname=list()
+
+    for(i in 1:nbClust){
+      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
+      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
+      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
+      if(length(a)!=0){
+        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
+      }
+      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
+    }
+    
+    for(i in 1:nbClust){
+        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
+                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
+        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
+    }
+        
+    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
+    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
+    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
+
+    for(i in 1:nbClust){
+      print("-----------------------------------------------------------------")
+      print(paste("Cluster",i))
+      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabPropCatch[i,1:length(propCatch)]=propCatch
+      print(propCatch)
+      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabTestVal[i,1:length(testVal)]=testVal
+      print(testVal)
+      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)>=0.1)]
+      tabPropLog[i,1:length(propLog)]=propLog
+      print(propLog)
+    }
+
+    tabClusters=array(0,dim=c(10,5,nbClust))
+    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
+    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
+    }
+
+    sizeTabClusters=numeric()
+    for(i in 1:nbClust){
+      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
+    }
+
+
+    # Target Species
+    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
+    #                                                   - Test-value > thresholdTestValue
+    #                                                   - % Logevents > thresholdLogevents 
+    thresholdCatch=75
+    thresholdTestValue=3
+    thresholdLogevents=30
+    
+    sppCumCatch=list()
+    sppTestValue=list()
+    sppLogevents=list()
+    targetSpeciesByCluster=list()
+    
+    for (i in 1:nbClust){
+      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
+      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
+      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
+      
+      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
+      
+      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
+    
+      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
+      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
+    }
+
+    # List of metiers (level 7)
+    listMetiersL7=list()
+    for (i in 1:nbClust){
+      metiersClusteri=targetSpeciesByCluster[[i]]
+      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
+      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
+      listMetiersL7[[i]]=metiersClusteri
+    }
+  
+    # Metier (level 7) of each logevent
+    metierByLogeventL7=unlist(sapply(clusters$cluster,function(x) listMetiersL7[[x]]))
+
+
+
+    # Create csv tables
+    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
+    
+    dfClust=data.frame()
+    dfClust=paste("Clust ",1:nbClust,sep="")
+    for(i in 1:nbClust){
+      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
+      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
+      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
+    }
+    
+    
+    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
+    print(" --- end of step 3 ---")
+    print(Sys.time()-t1)
+
+    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, 
+    betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, 
+    summaryClusters=summaryClusters, testValues=resval, 
+    testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents,
+    descClusters=clusterDesc2, tabClusters=tabClusters,
+    targetSpecies=targetSpeciesByCluster))
+
+  } else
+
+
+                      
+
+
+########################################################################################################################################   PAM
+
+  if(methMetier=="pam"){
+
+    # Calculation of optimal k thanks to the silhouette (second maximum)
+    nbLog=nrow(datLog)
+
+    clustersPam.silcoeff=numeric()
+    clustersPam.silcoeff[1]=0
+    clustersPam.silcoeff[2]=0
+    clustersPam.silcoeff[3]=0
+    
+    k=2
+    compMax=1
+    repeat{
+      k=k+2
+      print(k)
+      clustersPam=pam(datLog,k)
+      clustersPam.silcoeff[k]=clustersPam$silinfo$avg.width
+      clustersPam=pam(datLog,k+1)
+      clustersPam.silcoeff[k+1]=clustersPam$silinfo$avg.width
+      if((clustersPam.silcoeff[k-2]<clustersPam.silcoeff[k-1] & clustersPam.silcoeff[k-1]>clustersPam.silcoeff[k]) & compMax<=2){
+        if(compMax==2){
+          nbClust=k-1
+          print(paste("2e max =",k-1))
+          print(paste("nbClust =",nbClust))
+          break
+        } else {
+          compMax=compMax+1
+          print(paste("compMax1 =",compMax))
+          print(paste("1er max =",k-1))
+        }
+      }
+      if((clustersPam.silcoeff[k-1]<clustersPam.silcoeff[k] & clustersPam.silcoeff[k]>clustersPam.silcoeff[k+1]) & compMax<=2){
+        if(compMax==2){
+          nbClust=k
+          print(paste("2e max =",k))
+          print(paste("nbClust =",nbClust))
+          break
+        } else {
+          compMax=compMax+1
+          print(paste("compMax2 =",compMax))
+          print(paste("1er max =",k))
+        }
+      }
+    Store(objects())
+    gc(reset=TRUE)
+    }
+
+    png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800)
+    plot(clustersPam.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette") # k optimal corresponds to second maximum of silhouette's coefficients
+    dev.off()
+
+    Store(objects())
+    gc(reset=TRUE)
+
+    cat("PamSilCoeff",clustersPam.silcoeff,"\n")
+
+    # PAM with optimal k
+    clusters=pam(datLog,nbClust)
+    summary(clusters)
+
+
+    # Within and between variance of clusters and classification
+    centerOfGravityClassif=numeric()
+    withinVarClusters=numeric()
+    sizeClusti=numeric()
+    centerOfGravityDatLog=colMeans(datLog)
+    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
+    for(k in 1:nbClust){  # Within variance by cluster
+
+      clusti=datLog[which(clusters$clustering==k),]
+      if(length(which(clusters$clustering==k))==1)  centerOfGravityClusti=clusti
+      else centerOfGravityClusti=colMeans(clusti)
+      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
+      sizeClusti[k]=length(which(clusters$clustering==k))
+      if(length(which(clusters$clustering==k))==1)  withinVarClusters[k]=0
+      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
+
+    }
+    # Between variance
+    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
+    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
+    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+
+
+    # Compute the test-values for species
+    resval=test.values(clusters$clustering,datSpecies)
+    # Determine the target species
+    target=targetspecies(resval)
+    nbClust=length(clusters$id.med)
+    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
+    
+    
+    # Compute the percentage of logevents catching each species by cluster
+    mainSpecies=colnames(datSpecies)
+    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
+    for(i in 1:nbClust){
+      percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)
+    }
+    
+
+    # Projections on the first factorial plans
+    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(2,3))
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
+    if(dim(datLog)[2]>2) {
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
+    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
+    if(dim(datLog)[2]>3) {
+    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
+    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
+    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
+    }}
+    par(op)
+    dev.off()
+
+
+    # For a paper
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_2_3_PAM',sep="_"), type='png')
+    dev.off()
+
+    
+    # Catch profile of the dataset
+    meanprofile=colMeans(datSpecies)
+    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
+    op <- par(las=2)
+    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
+    par(op)
+    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    dev.off()
+
+
+    # Catch profile by cluster
+    nbSpec=ncol(datSpecies)
+    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
+    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
+    dimnames(summaryClusters)[[2]]=names(meanprofile)
+    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      if(clusters$clusinfo[i,1]==1){
+        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }else{
+        summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }
+    }
+    # Species names for catch profile plots
+    nameSpPlot=character()
+    catchMeanThreshold=2
+    for(i in 1:nbClust){
+      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
+      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
+      nameSpPloti=rep("",nbSpec)
+      nameSpPloti[numSpi]=namSpi
+      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
+    }
+    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # For a paper : levelplot
+    X11(4,4)
+    mat <- t(summaryClusters["Mean",,])
+    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
+    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
+    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
+    sp <- apply(mat,2, function(x) length(which(x>20)))
+    sp2=sp[which(sp>=1)]
+    #colnames(mat)[sp==0] <- ""
+    mat2=mat[,which(colnames(mat)%in%names(sp2))]
+    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
+    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
+    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
+    dev.off()
+
+
+    # Standard deviation profile by cluster
+    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
+    namSdPlot=character()
+    SdThreshold=2
+    for(i in 1:nbClust){
+      if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec)
+      }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)}
+      namSDi=names(which(sdprofilclusti>SdThreshold))
+      numSDi=which(sdprofilclusti>SdThreshold)
+      namSdPloti=rep("",nbSpec)
+      namSdPloti[numSDi]=namSDi
+      sdprofil[i,]=sdprofilclusti
+      namSdPlot=rbind(namSdPlot,namSdPloti)
+    }
+    rownames(sdprofil) <- 1:nrow(sdprofil)
+    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # Number of Logevents by cluster
+    x=c(1:nbClust)
+    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
+    coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
+    barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
+    text(coord,clusters$clusinfo[,1]+200,clusters$clusinfo[,1],font=2,xpd=NA)
+    dev.off()
+
+
+    # Profile of test-values by cluster
+    targetresval=numeric()
+    nameTargetPlot=character()
+    for(i in 1:nbClust){
+      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      nameTargetPloti=rep("",nbSpec)
+      nameTargetPloti[numtargeti]=nomtargeti
+      nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti)
+      targetresvalclusti=rep(0,nbSpec)
+      targetresvalclusti[numtargeti]=resval[nomtargeti,i]
+      targetresval=rbind(targetresval,targetresvalclusti)
+    }
+
+    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+
+    # Descriptive tables of the clusters
+    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
+    for(i in 1:nbClust){
+      clusterDesc[,i]=c(i,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
+                        length(which(t(summaryClusters["Median",,i])>50)),
+                        length(which(resval[,i]>1.96)),
+                        length(which(resval[,i]>3.29)),
+                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)),
+                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)),
+                        clusters$clusinfo[i,1])
+    }
+    rownames(clusterDesc)=c("Number of species",
+                            "to have 50% of catch", "to have 90% of catch",
+                            "with a median higher than 50",
+                            "with a test-value > 1.96", "with a test-value > 3.29",
+                            "catch in 50% of the logevents", "catch in 90% of the logevents",
+                            "Clusters size")
+    colnames(clusterDesc)=1:nbClust
+    clusterDesc2=as.data.frame(clusterDesc)
+
+
+    # Summary tables of the clusters
+    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
+    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
+    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
+    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
+    listLibname=list()
+
+    for(i in 1:nbClust){
+      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
+      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
+      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
+      if(length(a)!=0){
+        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
+      }
+      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
+    }
+    
+    for(i in 1:nbClust){
+        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
+                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
+        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
+    }
+
+    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
+    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
+    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
+
+    for(i in 1:nbClust){
+      print("-----------------------------------------------------------------")
+      print(paste("Cluster",i))
+      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabPropCatch[i,1:length(propCatch)]=propCatch
+      print(propCatch)
+      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabTestVal[i,1:length(testVal)]=testVal
+      print(testVal)
+      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)]
+      tabPropLog[i,1:length(propLog)]=propLog
+      print(propLog)
+    }
+
+    tabClusters=array(0,dim=c(10,5,nbClust))
+    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
+    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
+    }
+
+    sizeTabClusters=numeric()
+    for(i in 1:nbClust){
+      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
+    }
+
+
+    # Target Species
+    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
+    #                                                   - Test-value > thresholdTestValue
+    #                                                   - % Logevents > thresholdLogevents 
+    thresholdCatch=75
+    thresholdTestValue=3
+    thresholdLogevents=30
+    
+    sppCumCatch=list()
+    sppTestValue=list()
+    sppLogevents=list()
+    targetSpeciesByCluster=list()
+    
+    for (i in 1:nbClust){
+      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
+      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
+      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
+      
+      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
+      
+      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
+    
+      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
+      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
+    }
+
+    # List of metiers (level 7)
+    listMetiersL7=list()
+    for (i in 1:nbClust){
+      metiersClusteri=targetSpeciesByCluster[[i]]
+      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
+      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
+      listMetiersL7[[i]]=metiersClusteri
+    }
+  
+    # Metier (level 7) of each logevent
+    metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]]))
+
+
+    # Create csv tables
+    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
+    
+    dfClust=data.frame()
+    dfClust=paste("Clust ",1:nbClust,sep="")
+    for(i in 1:nbClust){
+      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
+      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
+      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
+    }
+
+
+    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
+    print(" --- end of step 3 ---")
+    print(Sys.time()-t1)
+
+    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, 
+    betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, 
+    summaryClusters=summaryClusters, testValues=resval, 
+    testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents,
+    descClusters=clusterDesc2, tabClusters=tabClusters,
+    targetSpecies=targetSpeciesByCluster))
+
+  } else
+
+
+         
+
+
+########################################################################################################################################   CLARA
+
+  if(methMetier=="clara"){
+    nbLog=nrow(datLog)
+    propSample=0.1
+
+    # Calculation of optimal k thanks to the silhouette (second maximum)
+    clustersClara.silcoeff=numeric()
+    clustersClara.silcoeff[1]=0
+    clustersClara.silcoeff[2]=0
+    clustersClara.silcoeff[3]=0
+    k=2
+    compMax=1
+    repeat{
+      k=k+2
+      print(k)
+      clustersClara=clara(datLog, k, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*k)))
+      clustersClara.silcoeff[k]=clustersClara$silinfo$avg.width
+      clustersClara=clara(datLog, k+1, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*(k+1))))
+      clustersClara.silcoeff[k+1]=clustersClara$silinfo$avg.width
+      if((clustersClara.silcoeff[k-2]<clustersClara.silcoeff[k-1] & clustersClara.silcoeff[k-1]>clustersClara.silcoeff[k]) & compMax<=2){
+        if(compMax==2){
+          nbClust=k-1
+          print(paste("2e max =",k-1))
+          print(paste("nbClust =",nbClust))
+          break
+        } else {
+          compMax=compMax+1
+          print(paste("compMax1 =",compMax))
+          print(paste("1er max =",k-1))
+        }
+      }
+      if((clustersClara.silcoeff[k-1]<clustersClara.silcoeff[k] & clustersClara.silcoeff[k]>clustersClara.silcoeff[k+1]) & compMax<=2){
+        if(compMax==2){
+          nbClust=k
+          print(paste("2e max =",k))
+          print(paste("nbClust =",nbClust))
+          break
+        } else {
+          compMax=compMax+1
+          print(paste("compMax2 =",compMax))
+          print(paste("1er max =",k))
+        }
+      }
+    Store(objects())
+    gc(reset=TRUE)
+    }
+    
+
+    png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800)
+    plot(clustersClara.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette")               # k optimal corresponds to maximum of silhouette's coefficients
+    dev.off()
+
+    Store(objects())
+    gc(reset=TRUE)
+
+    cat("ClaraSilCoeff",clustersClara.silcoeff,"\n")
+
+
+    # CLARA with optimal k
+    clusters=clara(datLog, nbClust, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*nbClust)))  # CLARA with optimal k
+    summary(clusters)
+
+
+    # Within and between variance of clusters and classification
+    centerOfGravityClassif=numeric()
+    withinVarClusters=numeric()
+    sizeClusti=numeric()
+    centerOfGravityDatLog=colMeans(datLog)
+    centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog)
+    for(k in 1:nbClust){  # Within variance by cluster
+
+      clusti=datLog[which(clusters$clustering==k),]
+      if(length(which(clusters$clustering==k))==1)  centerOfGravityClusti=clusti
+      else centerOfGravityClusti=colMeans(clusti)
+      centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti)
+      sizeClusti[k]=length(which(clusters$clustering==k))
+      if(length(which(clusters$clustering==k))==1)  withinVarClusters[k]=0
+      else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti)))
+      
+    }
+    # Between variance
+    classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2))
+    # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification
+    withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+    betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100
+
+
+    # Compute the test-values for species
+    resval=test.values(clusters$cluster,datSpecies)
+    # Determine the target species
+    target=targetspecies(resval)
+    rownames(target$tabnomespcib)=paste("Cluster",1:nbClust)
+    
+    
+    # Compute the percentage of logevents catching each species by cluster
+    mainSpecies=colnames(datSpecies)
+    percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies))
+    for(i in 1:nbClust){
+      percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)
+    }
+    
+    
+    # Projections on the first factorial plans
+    png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(2,3))
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2")
+    if(dim(datLog)[2]>2) {
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3")
+    plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3")
+    if(dim(datLog)[2]>3) {
+    plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4")
+    plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4")
+    plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4")
+    }}
+    par(op)
+    dev.off()
+
+
+    # For a paper
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3")
+    abline(h=0, lty=2) ; abline(v=0, lty=2)
+    savePlot(filename=paste(analysisName,'projections_2_3_CLARA',sep="_"), type='png')
+    dev.off()
+
+    # Catch profile of the dataset
+    meanprofile=colMeans(datSpecies)
+    png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800)
+    op <- par(las=2)
+    barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch")
+    par(op)
+    mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    dev.off()
+
+
+    # Catch profile by cluster
+    nbSpec=ncol(datSpecies)
+    summaryClusters=array(0,dim=c(6,nbSpec,nbClust))
+    dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.")
+    dimnames(summaryClusters)[[2]]=names(meanprofile)
+    dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      if(clusters$clusinfo[i,1]==1){
+        summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }else{
+        summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2,
+          function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x))))
+      }
+    }
+    # Species names for catch profile plots
+    nameSpPlot=character()
+    catchMeanThreshold=2
+    for(i in 1:nbClust){
+      namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)])
+      numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold)
+      nameSpPloti=rep("",nbSpec)
+      nameSpPloti[numSpi]=namSpi
+      nameSpPlot=rbind(nameSpPlot,nameSpPloti)
+    }
+    # Plot
+    png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Catch profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # For a paper : levelplot
+    X11(4,4)
+    mat <- t(summaryClusters["Mean",,])
+    #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV",
+    #                          "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)]
+    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
+    sp <- apply(mat,2, function(x) length(which(x>20)))
+    sp2=sp[which(sp>=1)]
+    #colnames(mat)[sp==0] <- ""
+    mat2=mat[,which(colnames(mat)%in%names(sp2))]
+    cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline")
+    print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1)))
+    savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png')
+    dev.off()
+
+    # OR #
+    mat <- t(summaryClusters["Mean",,])
+    #mat=mat[,order(colnames(mat),decreasing=TRUE)]     # si on veut mettre les especes par ordre descendant
+    rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)]
+    sp <- apply(mat,2, function(x) length(which(x>20)))
+    colnames(mat)[sp==0] <- ""
+    cc <- colorRampPalette(c("white", "steelblue2", "blue4"),space = "rgb", interpolate="spline")
+    png(filename = paste(paste(analysisName,'mean_profile_by_cluster_levelplot_blue',sep="_"),".png",sep=""), width = 400, height = 800)
+    print(levelplot(mat, cuts=4, aspect=3, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.8), colorkey=list(space="right", at=c(0,20,40,60,80,100))))
+    dev.off()
+  
+
+    # Standard deviation profile by cluster
+    sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec)
+    namSdPlot=character()
+    SdThreshold=5
+    for(i in 1:nbClust){
+      if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec)
+      }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)}
+      namSDi=names(which(sdprofilclusti>SdThreshold))
+      numSDi=which(sdprofilclusti>SdThreshold)
+      namSdPloti=rep("",nbSpec)
+      namSdPloti[numSDi]=namSDi
+      sdprofil[i,]=sdprofilclusti
+      namSdPlot=rbind(namSdPlot,namSdPloti)
+    }
+    rownames(sdprofil) <- 1:nrow(sdprofil)
+    png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Standard deviation profile by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+    # Number of Logevents by cluster
+    x=c(1:nbClust)
+    png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800)
+    coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents")
+    barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue")
+    text(coord,clusters$clusinfo[,1]+5,clusters$clusinfo[,1],font=2,xpd=NA)
+    dev.off()
+
+
+    # Profile of test-values by cluster
+    targetresval=matrix(0,nrow=nbClust,ncol=nbSpec)
+    colnames(targetresval)=colnames(datSpecies)
+    rownames(targetresval)=1:nbClust
+    nameTargetPlot=matrix(NA,nrow=nbClust,ncol=nbSpec)
+    for(i in 1:nbClust){
+      nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))])
+      nameTargetPlot[i,numtargeti]=nomtargeti
+      targetresval[i,numtargeti]=resval[nomtargeti,i]
+    }
+
+    png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust))))
+    for(i in 1:nbClust){
+      op2 <- par(las=2)
+      barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value")
+      par(op2)
+      mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue")
+    }
+    par(op)
+    title(main=paste("Profile of test-values by cluster","\n","\n",sep=""))
+    dev.off()
+
+
+
+    # Descriptive tables of the clusters
+    clusterDesc=matrix(0,nrow=9,ncol=nbClust)
+    for(i in 1:nbClust){
+      clusterDesc[,i]=c(i,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1,
+                        length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1,
+                        length(which(t(summaryClusters["Median",,i])>50)),
+                        length(which(resval[,i]>1.96)),
+                        length(which(resval[,i]>3.29)),
+                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)),
+                        length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)),
+                        clusters$clusinfo[i,1])
+    }
+    rownames(clusterDesc)=c("Number of species",
+                            "to have 50% of catch", "to have 90% of catch",
+                            "with a median higher than 50",
+                            "with a test-value > 1.96", "with a test-value > 3.29",
+                            "catch in 50% of the logevents", "catch in 90% of the logevents",
+                            "Clusters size")
+    colnames(clusterDesc)=1:nbClust
+    clusterDesc2=as.data.frame(clusterDesc)
+
+
+    # Summary tables of the clusters
+    namesSpecies=matrix(NA,nrow=nbClust,ncol=10)
+    namesCapt=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]])
+    namesTarget=matrix(NA,nrow=nbClust,ncol=5)
+    nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]])
+    tabLibname=matrix(NA,nrow=nbClust,ncol=10)
+    listLibname=list()
+
+    for(i in 1:nbClust){
+      namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch]
+      a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i]))
+      colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])]
+      if(length(a)!=0){
+        namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)])
+      }
+      namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,])
+    }
+    
+    for(i in 1:nbClust){
+        listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" 
+                                                    else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"])
+        tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x))))
+    }
+         
+    tabPropCatch=matrix(NA,nrow=nbClust,ncol=10)
+    tabTestVal=matrix(NA,nrow=nbClust,ncol=10)
+    tabPropLog=matrix(NA,nrow=nbClust,ncol=10)
+
+    for(i in 1:nbClust){
+      print("-----------------------------------------------------------------")
+      print(paste("Cluster",i))
+      propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabPropCatch[i,1:length(propCatch)]=propCatch
+      print(propCatch)
+      testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)]
+      tabTestVal[i,1:length(testVal)]=testVal
+      print(testVal)
+      propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)]
+      tabPropLog[i,1:length(propLog)]=propLog
+      print(propLog)
+    }
+
+    tabClusters=array(0,dim=c(10,5,nbClust))
+    dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents")
+    dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust)
+    for(i in 1:nbClust){
+      tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,])
+    }
+
+    sizeTabClusters=numeric()
+    for(i in 1:nbClust){
+      sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])]))
+    }
+    
+    
+    # Target Species
+    # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch
+    #                                                   - Test-value > thresholdTestValue
+    #                                                   - % Logevents > thresholdLogevents 
+    thresholdCatch=75
+    thresholdTestValue=3
+    thresholdLogevents=30
+    
+    sppCumCatch=list()
+    sppTestValue=list()
+    sppLogevents=list()
+    targetSpeciesByCluster=list()
+    
+    for (i in 1:nbClust){
+      percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i]))
+      nbSpSel=length(which(percCatchCum<thresholdCatch))+1
+      sppCumCatch[[i]]=tabClusters[1:nbSpSel,"FAO",i]
+      
+      sppTestValue[[i]]=tabClusters[which(as.numeric(tabClusters[,"Test-value",i])>thresholdTestValue),"FAO",i]
+      
+      sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i]
+    
+      targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]])
+      targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]])
+    }
+
+    # List of metiers (level 7)
+    listMetiersL7=list()
+    for (i in 1:nbClust){
+      metiersClusteri=targetSpeciesByCluster[[i]]
+      metiersClusteri=as.character(unique(unlist(metiersClusteri)))
+      metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ")
+      listMetiersL7[[i]]=metiersClusteri
+    }
+  
+    # Metier (level 7) of each logevent
+    metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]]))
+
+
+
+    # Create csv tables
+    write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";")
+    
+    dfClust=data.frame()
+    dfClust=paste("Clust ",1:nbClust,sep="")
+    for(i in 1:nbClust){
+      write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")
+      tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i])
+      write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";")  
+    }     
+
+
+    LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7)
+    print(" --- end of step 3 ---")
+    print(Sys.time()-t1)
+
+    return(list(LE_ID_clust=LE_ID_clust, clusters=clusters,
+    betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust,
+    summaryClusters=summaryClusters, testValues=resval, 
+    testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, 
+    descClusters=clusterDesc2, tabClusters=tabClusters,
+    targetSpecies=targetSpeciesByCluster))                                   
+
+  }  else stop("methMetier must be hac, kmeans, pam or clara")
+  # end of the methods
+
+
+} # end of the function "getMetierClusters"
+                          
diff --git a/vmstools/R/getTableAfterPCA.r b/vmstools/R/getTableAfterPCA.r
index 16ad140..1be5a3f 100644
--- a/vmstools/R/getTableAfterPCA.r
+++ b/vmstools/R/getTableAfterPCA.r
@@ -1,129 +1,222 @@
-################################################################################
-#  STEP 2 OF THE MULTIVARIATE CLASSIFICATION :                                 #
-#         RUN A PCA ON THE DATASET FROM STEP 1                                 #
-#         (2 CRITERIA : 70PERCENTS AND SCREETEST ARE AVAILABLE)                #
-#         IT'S POSSIBLE TO KEEP THE DATASET FROM STEP 1 BY CHOOSING "NOPCA"    #
-################################################################################
-
-
-getTableAfterPCA = function(datSpecies,analysisName="",pcaYesNo="pca",criterion="70percents"){
-
-  LE_ID <- rownames(datSpecies)
-  NbSpecies <- dim(datSpecies)[2]
-  datSpecies <- as.matrix(datSpecies,ncol=NbSpecies,nrow=length(LE_ID))
-
-  print("######## STEP 2 PCA/NO PCA ON CATCH PROFILES ########")
-
-  t1 <- Sys.time()
-  print(paste("--- Selected method :",pcaYesNo, "---"))
-
-
-  if(pcaYesNo=="pca"){
-  
-    print("Running PCA on all axes...")
-    
-    # PCA (Principal Component Analysis)
-    log.pca = PCA(datSpecies, graph=FALSE, ncp=ncol(datSpecies))
-    
-    X11(5,5)
-    plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1)
-    savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1)
-    savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot.PCA(log.pca, choix = "var", axes = 2:3, new.plot=FALSE, title="", lim.cos2.var = 0.1)
-    savePlot(filename=paste(analysisName,'species_projection_on_the_2_and_3_factorial_axis',sep="_"), type='png')
-    dev.off()
-    
-    X11(5,5)
-    plot.PCA(log.pca, choix = "ind", axes = 1:2, habillage = "ind", title="", new.plot=FALSE, cex=1.1)
-    savePlot(filename=paste(analysisName,'projection_of_individuals_on_the_first_two_factorial_axis',sep="_"), type='png')
-    dev.off()
- 
-
-    # Determine the number of axis to keep
-    if(criterion=="70percents"){
-      nbaxes=which(log.pca$eig[,3]>70)[1]   # we are taking the axis until having 70% of total inertia
-      cat("--- number of axes:",nbaxes,"\n")
-      cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n")
-    } else
-    # OR
-    if(criterion=="screetest"){
-      nbaxes=which(scree(log.pca$eig[,1])[,"epsilon"]<0)[2]  # thanks to the scree-test
-      cat("--- number of axes:",nbaxes,"\n")
-      cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n")
-    } else stop("Criterion for PCA must be 70percents or screetest")
-
-
-    # Eigenvalues and relative graphics
-    log.pca$eig
-
-    png(paste(analysisName,"Eigen values.png",sep="_"), width = 1200, height = 800)
-    x=1:length(log.pca$eig[,1])
-    barplot(log.pca$eig[,1],names.arg=x, main="Eigen values")
-    dev.off()
-   
-    png(paste(analysisName,"Percentage of Inertia.png",sep="_"), width = 1200, height = 800)
-    color=rep("grey",length(log.pca$eig[,1]))
-    if(criterion=="screetest") color[1:nbaxes]="green"
-    barplot(log.pca$eig[,2],names.arg=x, col=color, main="Percentage of Inertia of factorial axis", xlab="Axis", ylab="% of Inertia")
-    dev.off()
-   
-    X11(5,5)
-    op <- par(no.readonly = TRUE) 
-    par(mar=c(4,4,1,1))
-    color=rep("grey",length(log.pca$eig[,1]))
-    if(criterion=="70percents") color[1:nbaxes]="green"
-    barplot(log.pca$eig[,3], col=color, ylab="", xlab="", axes=FALSE, cex.names=2)
-    axis(2,  las=2)
-    abline(h=70, col="red")
-    text(1,72, "70% of Inertia", col = "red", adj = c(0, -.1))
-    mtext("Axes", side=1, adj=0.5, outer=FALSE, line=+1, font=1, cex=1.5)
-    mtext("% of Inertia", side=2, adj=0.5, outer=FALSE, line=+2.5, font=1, cex=1.5)
-    savePlot(filename = paste(analysisName,"Cumulative Percentage of Inertia.png",sep="_"),type ="png")
-    par(op)
-    dev.off()
-   
-
-    # Projection of variables "species" on the first factorial axis
-    png(paste(analysisName,"Projection of Species on first factorial axis.png",sep="_"), width = 1200, height = 800)
-    op <- par(mfrow=c(2,3))
-    plot(log.pca,choix="var",axes = c(1, 2),new.plot=FALSE,lim.cos2.var = 0.3)
-    plot(log.pca,choix="var",axes = c(2, 3),new.plot=FALSE,lim.cos2.var = 0.3)
-    plot(log.pca,choix="var",axes = c(1, 3),new.plot=FALSE,lim.cos2.var = 0.3)
-    plot(log.pca,choix="var",axes = c(1, 4),new.plot=FALSE,lim.cos2.var = 0.3)
-    plot(log.pca,choix="var",axes = c(2, 4),new.plot=FALSE,lim.cos2.var = 0.3)
-    #plot(log.pca,choix="var",axes = c(3, 4),new.plot=FALSE,lim.cos2.var = 0.3)
-    par(op)
-    title(main=paste("Projection of Species on first factorial axis","\n","\n",sep=""))
-    dev.off()
-
-
-    # PCA with the good number of axis
-    print("Retaining Principal Components of selected axes...")
-    log.pca=log.pca$ind$coord[,1:nbaxes]
-    datLog=round(log.pca,4)
-
-
-  } else
-
-
-  if(pcaYesNo=="nopca"){
-    datLog=datSpecies
-  }  else stop("pcaYesNo must be pca or nopca")
-
-  Store(objects())
-  gc(reset=TRUE)
-
-  print(" --- end of step 2 ---")
-  print(Sys.time()-t1)
-
-  return(datLog)
-
-}
-
+################################################################################
+#  STEP 2 OF THE MULTIVARIATE CLASSIFICATION :                                 #
+#         RUN A PCA ON THE DATASET FROM STEP 1                                 #
+#         (2 CRITERIA : 70PERCENTS AND SCREETEST ARE AVAILABLE)                #
+#         IT'S POSSIBLE TO KEEP THE DATASET FROM STEP 1 BY CHOOSING "NOPCA"    #
+################################################################################
+
+
+
+
+#' Finding Metiers from a reduced EFLALO dataset, step 2: Options for running a
+#' PCA on the selected species.
+#' 
+#' This function represents the second step in the multivariate analysis of
+#' logbooks data for identifying metiers.
+#' 
+#' This step of the classification is a user-driven choice of running or not a
+#' Principal Component Analysis (PCA) on the catch per logevents of the species
+#' retained, expressed in percentage. A PCA will help reducing the
+#' multi-dimensional catch matrix to a smaller number of informative components
+#' represented by the first n- axes of the PCA transformation. The number of
+#' axes can be chosen either by using a scree test (criterion 'screetest')
+#' looking for the significant marginal increases of explained inertia, or by
+#' selecting all axes cumulating 70 percent of explained inertia (criterion
+#' '70percents').
+#' 
+#' 
+#' @param datSpecies numerical matrix with Logevents as lines and species as
+#' columns, with percentage values (between 0 and 100) of each species in the
+#' logevent catches.  Logevent ID (LE_ID) should be as row names. Typically,
+#' this table will be produced from a eflalo dataset using the function
+#' extractTableMainSpecies()
+#' @param analysisName character, the name of the run. Used for the file name
+#' of the plots.
+#' @param pcaYesNo character. An indication of whether a PCA should be
+#' performed. Use "pca" if a PCA should be run, and "nopca" if it shouldn't.
+#' @param criterion character. Criterion used for selected the number of
+#' principal components (axes) retained.  The number of axes can be chosen
+#' either by using a scree test (criterion 'screetest'), looking for the
+#' second-order unsignificant marginal increases of explained inertia, or by
+#' selecting all axes cumulating up to 70 percent of explained inertia
+#' (criterion '70percents'). If pcaYesNo="nopca", then this criterion should be
+#' set as NULL. Experience has shown that the '70percents' criterion may often
+#' select a larger number axes than the scree test criterion, and may therefore
+#' be more appropriate for analysing large and heterogeneous datasets, by
+#' retaining more information for the subsequent clustering.
+#' @return If a PCA is run, a number of graphs are produced and directly saved
+#' in the working directory. These graphs describe 1) species projection on the
+#' first factorial axes, 2) individuals (logevents) projections on the first
+#' factorial axes, 3) actual and cumulative percentage of inertia of factorial
+#' axes, and 4) eigenvalues.
+#' 
+#' If a PCA is run, the function returns a numerical matrix with Logevents as
+#' lines and selected Principal Components as columns.  If no PCA is run, the
+#' function returns the same matrix as the input, with percentage values by
+#' species.
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @seealso selectMainSpecies(), extractTableMainSpecies()
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @examples
+#' 
+#' 
+#'   \dontrun{
+#'                
+#'   data(eflalo)
+#'   
+#'   eflalo <- formatEflalo(eflalo)
+#' 
+#'   eflalo <- eflalo[eflalo$LE_GEAR=="OTB",]
+#'   # note that output plots will be sent to getwd()
+#'   analysisName <- "metier_analysis_OTB"
+#'   
+#'   dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))]
+#'   names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]]))
+#' 
+#'   explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE)
+#'     #=> send the LE_ID and LE_KG_SP columns only
+#'            
+#'   Step1 <- extractTableMainSpecies(dat, explo$NamesMainSpeciesHAC,
+#'               paramTotal=95, paramLogevent=100)
+#'     #=> send the LE_ID and LE_KG_SP columns only
+#' 
+#'   rowNamesSave <- row.names(Step1)
+#'   row.names(Step1) <- 1:nrow(Step1)
+#' 
+#'   # Run a PCA
+#'   Step2 <- getTableAfterPCA(Step1, analysisName, pcaYesNo="pca",
+#'                             criterion="70percents")
+#' 
+#'   row.names(Step1) <- rowNamesSave
+#'   row.names(Step2) <- rowNamesSave
+#'   
+#'   }
+#' 
+#' 
+#' @export getTableAfterPCA
+getTableAfterPCA = function(datSpecies,analysisName="",pcaYesNo="pca",criterion="70percents"){
+
+  LE_ID <- rownames(datSpecies)
+  NbSpecies <- dim(datSpecies)[2]
+  datSpecies <- as.matrix(datSpecies,ncol=NbSpecies,nrow=length(LE_ID))
+
+  print("######## STEP 2 PCA/NO PCA ON CATCH PROFILES ########")
+
+  t1 <- Sys.time()
+  print(paste("--- Selected method :",pcaYesNo, "---"))
+
+
+  if(pcaYesNo=="pca"){
+  
+    print("Running PCA on all axes...")
+    
+    # PCA (Principal Component Analysis)
+    log.pca = PCA(datSpecies, graph=FALSE, ncp=ncol(datSpecies))
+    
+    X11(5,5)
+    plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1)
+    savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1)
+    savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot.PCA(log.pca, choix = "var", axes = 2:3, new.plot=FALSE, title="", lim.cos2.var = 0.1)
+    savePlot(filename=paste(analysisName,'species_projection_on_the_2_and_3_factorial_axis',sep="_"), type='png')
+    dev.off()
+    
+    X11(5,5)
+    plot.PCA(log.pca, choix = "ind", axes = 1:2, habillage = "ind", title="", new.plot=FALSE, cex=1.1)
+    savePlot(filename=paste(analysisName,'projection_of_individuals_on_the_first_two_factorial_axis',sep="_"), type='png')
+    dev.off()
+ 
+
+    # Determine the number of axis to keep
+    if(criterion=="70percents"){
+      nbaxes=which(log.pca$eig[,3]>70)[1]   # we are taking the axis until having 70% of total inertia
+      cat("--- number of axes:",nbaxes,"\n")
+      cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n")
+    } else
+    # OR
+    if(criterion=="screetest"){
+      nbaxes=which(scree(log.pca$eig[,1])[,"epsilon"]<0)[2]  # thanks to the scree-test
+      cat("--- number of axes:",nbaxes,"\n")
+      cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n")
+    } else stop("Criterion for PCA must be 70percents or screetest")
+
+
+    # Eigenvalues and relative graphics
+    log.pca$eig
+
+    png(paste(analysisName,"Eigen values.png",sep="_"), width = 1200, height = 800)
+    x=1:length(log.pca$eig[,1])
+    barplot(log.pca$eig[,1],names.arg=x, main="Eigen values")
+    dev.off()
+   
+    png(paste(analysisName,"Percentage of Inertia.png",sep="_"), width = 1200, height = 800)
+    color=rep("grey",length(log.pca$eig[,1]))
+    if(criterion=="screetest") color[1:nbaxes]="green"
+    barplot(log.pca$eig[,2],names.arg=x, col=color, main="Percentage of Inertia of factorial axis", xlab="Axis", ylab="% of Inertia")
+    dev.off()
+   
+    X11(5,5)
+    op <- par(no.readonly = TRUE) 
+    par(mar=c(4,4,1,1))
+    color=rep("grey",length(log.pca$eig[,1]))
+    if(criterion=="70percents") color[1:nbaxes]="green"
+    barplot(log.pca$eig[,3], col=color, ylab="", xlab="", axes=FALSE, cex.names=2)
+    axis(2,  las=2)
+    abline(h=70, col="red")
+    text(1,72, "70% of Inertia", col = "red", adj = c(0, -.1))
+    mtext("Axes", side=1, adj=0.5, outer=FALSE, line=+1, font=1, cex=1.5)
+    mtext("% of Inertia", side=2, adj=0.5, outer=FALSE, line=+2.5, font=1, cex=1.5)
+    savePlot(filename = paste(analysisName,"Cumulative Percentage of Inertia.png",sep="_"),type ="png")
+    par(op)
+    dev.off()
+   
+
+    # Projection of variables "species" on the first factorial axis
+    png(paste(analysisName,"Projection of Species on first factorial axis.png",sep="_"), width = 1200, height = 800)
+    op <- par(mfrow=c(2,3))
+    plot(log.pca,choix="var",axes = c(1, 2),new.plot=FALSE,lim.cos2.var = 0.3)
+    plot(log.pca,choix="var",axes = c(2, 3),new.plot=FALSE,lim.cos2.var = 0.3)
+    plot(log.pca,choix="var",axes = c(1, 3),new.plot=FALSE,lim.cos2.var = 0.3)
+    plot(log.pca,choix="var",axes = c(1, 4),new.plot=FALSE,lim.cos2.var = 0.3)
+    plot(log.pca,choix="var",axes = c(2, 4),new.plot=FALSE,lim.cos2.var = 0.3)
+    #plot(log.pca,choix="var",axes = c(3, 4),new.plot=FALSE,lim.cos2.var = 0.3)
+    par(op)
+    title(main=paste("Projection of Species on first factorial axis","\n","\n",sep=""))
+    dev.off()
+
+
+    # PCA with the good number of axis
+    print("Retaining Principal Components of selected axes...")
+    log.pca=log.pca$ind$coord[,1:nbaxes]
+    datLog=round(log.pca,4)
+
+
+  } else
+
+
+  if(pcaYesNo=="nopca"){
+    datLog=datSpecies
+  }  else stop("pcaYesNo must be pca or nopca")
+
+  Store(objects())
+  gc(reset=TRUE)
+
+  print(" --- end of step 2 ---")
+  print(Sys.time()-t1)
+
+  return(datLog)
+
+}
+
diff --git a/vmstools/R/getndp.R b/vmstools/R/getndp.R
index 08095e9..b0b6e53 100644
--- a/vmstools/R/getndp.R
+++ b/vmstools/R/getndp.R
@@ -1,10 +1,26 @@
-`getndp` <-
-function(x, tol=2*.Machine$double.eps)
-{
-  ndp <- 0
-  while(!isTRUE(all.equal(x, round(x, ndp), tol=tol))) ndp <- ndp+1
-  if(ndp > -log10(tol)) warning("Tolerance reached, ndp possibly
-underestimated.")
-  ndp
-}
-
+#' Get Number of Decimal Places
+#' 
+#' Return the number of decimal places of a 'numeric'.
+#' 
+#' 
+#' @param x Number to find decimal places from
+#' @param tol Tolerance to use
+#' @note Function not created under EU lot 2 project but found under R-help.
+#' Please look there for credits.
+#' @author See R-help pages
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' getndp(5.677) #result: 3
+#' 
+#' @export getndp
+`getndp` <-
+function(x, tol=2*.Machine$double.eps)
+{
+  ndp <- 0
+  while(!isTRUE(all.equal(x, round(x, ndp), tol=tol))) ndp <- ndp+1
+  if(ndp > -log10(tol)) warning("Tolerance reached, ndp possibly
+underestimated.")
+  ndp
+}
+
diff --git a/vmstools/R/indicators.r b/vmstools/R/indicators.r
index 8b445ad..c7c98a1 100644
--- a/vmstools/R/indicators.r
+++ b/vmstools/R/indicators.r
@@ -1,135 +1,285 @@
-## indicators.r
-## by Fabrizio Manco, 14/02/2011
-## calculates the DCF indicators 5,6 or 7
-
-indicators <- function    ( indicatorNum=5,             # indicator 5, 6 or 7
-                            tacsat,                     # tacsat-like input data
-                            minThreshold=10,            # if time interval has been calculated (and named SI_INTV), it's a minimal nb of minutes, otherwise, it's minimal number of points
-                            pctThreshold=90,            # specific to indicator 6, percentage of points to include
-                            ltGear="",                  # a list of gear code to keep for the analysis /!\ gear code field must be called LE_GEAR
-                            inShapeArea="",             # specific to indicator 7, the name of the shapefile without the .shp extension
-                            cellresX=0.05,              # grid cell resolution, x axis
-                            cellresY=0.05,              # grid cell resolution, y axis
-                            calcAreaMethod="Trapezoid", # "Trapezoid" (fast and less accurate, good for small cellsizes) or "UTM" (accurate but slow, good for huge cellsizes)
-                            plotMapTF=FALSE,
-                            exportGridName="",          # if not empty, the monthly (DCF5 and 6) grids will be exported as an ASCII grid (.asc) named with this string, DCF number and month number
-                            exportTableName=""
-                            )
-  {
-  # for all indicators
-  #remove rows with NA
-  tacsat<-tacsat[complete.cases(tacsat),]
-  # gear filtering: intially only necessary for DCF 7 ("area impacted by mobile bottom gears"), but might be useful for other indicators
-  # keep only the gear codes listed in mobileBottomGear, and tacsat must have a LE_GEAR column
-  if (length(ltGear)>1 & !is.null(tacsat$LE_GEAR)) {tacsat<-subset(tacsat, tacsat$LE_GEAR %in% ltGear)}
-  
-  if (indicatorNum==7)
-    {
-    #### DCF INDICATOR 7 -  Areas not impacted by mobile bottom gears ####
-    
-    require(shapefiles)
-    require(sp)
-    require(PBSmapping)
-
-    if (inShapeArea!="")
-      { # read the shapefile
-      shapeAll<-read.shapefile(inShapeArea)
-
-      # clip the shape polygon with the land
-      clipShapeFromLand<-clipPolygons (shapeAll, europa)
-
-      vmsPingsCoord<-cbind(tacsat$SI_LONG, tacsat$SI_LATI)
-      pointInOutByPoly<-rep(0,length(vmsPingsCoord[,1]))
-
-      ltPoly<-unique(clipShapeFromLand$PID)
-
-      # points in polygons
-      for (x in 1:length(ltPoly)){
-        polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]])
-        pointInOutByPoly<-pointInOutByPoly + point.in.polygon(vmsPingsCoord[,1], vmsPingsCoord[,2], polyCoord[,1], polyCoord[,2])
-        }
-
-      tacsat$pointInOut<-pointInOutByPoly
-      tacsat<-subset(tacsat, pointInOut!=0)
-      }
-
-    # Grid the points
-    if ("SI_INTV" %in% colnames(tacsat)) { nameVarToSum="SI_INTV"} else {nameVarToSum=""}
-    if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,".asc",sep="")} else {outGridFileName<-""}
-    vmsGrid<-vmsGridCreate(tacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotPoints = FALSE, outGridFile=outGridFileName)
-
-    # calculate the area of each cell in square km
-    vmsGrid<-surface(vmsGrid, method=calcAreaMethod, includeNA=TRUE)
-
-    if (inShapeArea!="")
-      {
-      # specify which grid cell is in the polygon
-      gridPointsCoord<-coordinates(vmsGrid)
-      gridCellInOutByPoly<-rep(0,length(gridPointsCoord[,1]))
-
-      # cells in polygon
-      for (x in 1:length(ltPoly)){
-        polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]])
-        gridCellInOutByPoly<-gridCellInOutByPoly + point.in.polygon(gridPointsCoord[,1], gridPointsCoord[,2], polyCoord[,1], polyCoord[,2])
-        }
-
-      vmsGrid$inPolygon<-gridCellInOutByPoly
-      areaInPolygon<-sum(vmsGrid@data$cellArea[vmsGrid@data$inPolygon==1])
-
-      } else {areaInPolygon<-sum(vmsGrid@data$cellArea)}  # if no shapefile area is provided the entire rectangle containing the pings is processed
-
-    # calculate the areas
-    areaFishing<-sum(vmsGrid@data$cellArea[!is.na(vmsGrid@data$fishing) & vmsGrid@data$fishing>minThreshold])
-
-    # calculate the result
-    tableResultDCF<-areaInPolygon-areaFishing
-    }
-
-  #### DCF INDICATORS 5 AND 6 ####
-  if (indicatorNum==5 | indicatorNum==6)
-    {
-    # commons for DCF 5 and 6
-    if (!"SI_DATIM" %in% colnames(tacsat)) {tacsat$SI_DATIM  <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")}
-    ltMonth<-unique(as.numeric(format(tacsat$SI_DATIM, format="%m")))
-    ltMonth<-sort(ltMonth)
-    tableResultDCF=data.frame(month=ltMonth, DCF=rep(0, length(ltMonth)))
-
-    # monthly process
-    for (x in 1:length(ltMonth)){
-      currMonth<-ltMonth[x]
-      monthlyTacsat<-subset(tacsat, as.numeric(format(tacsat$SI_DATIM, format="%m"))==currMonth)
-      
-      if (indicatorNum==5)
-      {
-        # specific to DCF 5
-        if ("SI_INTV" %in% colnames(tacsat)) {nameVarToSum="SI_INTV"} else {nameVarToSum=""}
-      }
-      
-      if (indicatorNum==6)
-      {
-        # specific to DCF 6
-        # flag the pings inside the MCP according to the pctThreshold
-        monthlyTacsat<-tacsatMCP(monthlyTacsat, pctThreshold)
-        # grid the vms pings inside the MCP
-        monthlyTacsat<-subset(monthlyTacsat, monthlyTacsat$INMCP!=0)
-        nameVarToSum=""
-      }
-      
-      # Create the grid
-      if (plotMapTF) {x11()}
-      if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,"_Month", currMonth,".asc",sep="")} else {outGridFileName<-""}
-      monthlyVmsGrid<-vmsGridCreate(monthlyTacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotTitle=paste("Month ", currMonth), plotPoints = FALSE, outGridFile=outGridFileName)
-      if (plotMapTF==TRUE & indicatorNum==6) {plot_mcp(plotnew=FALSE, plotpoints=FALSE, titletxt="")}   # plot the specific DCF 6 MCP
-
-      # calculate the area of each cell in square km and store it into a table
-      monthlyVmsGrid<-surface(monthlyVmsGrid, method=calcAreaMethod, includeNA=FALSE)
-      tableResultDCF[x,2]<-sum(monthlyVmsGrid@data$cellArea[!is.na(monthlyVmsGrid@data$fishing) & monthlyVmsGrid@data$fishing>minThreshold])
-      }
-    if (exportTableName!="") 
-      {
-      # export the table as a csv in the wd
-      write.csv(tableResultDCF, paste(exportTableName, indicatorNum, ".csv", sep=""))
-      }
-    }
-  return(tableResultDCF)
-  }
\ No newline at end of file
+## indicators.r
+## by Fabrizio Manco, 14/02/2011
+## calculates the DCF indicators 5,6 or 7
+
+
+
+#' Calculate the DCF indicators
+#' 
+#' This function estimates the DCF indicators 5, 6 and 7 from the tacsat
+#' dataset.
+#' 
+#' SUMMARY
+#' 
+#' The EU Data Collection Framework (DCF) standardizes three indicators to
+#' analyse the fishing activity. They are summarised as follow:
+#' 
+#' DCF Indicator 5: Distribution of fishing activities. The spatial extent of
+#' fishing activity based on the total area of grids within which VMS records
+#' were obtained, each month; DCF Indicator 6: Aggregation of fishing
+#' activities. The extent to which fishing activity is aggregated based on the
+#' total area of grids within which 90 percent of VMS records were obtained,
+#' each month. DCF Indicator 7: Areas not impacted by mobile bottom gears. The
+#' area of seabed that has not been impacted by mobile bottom fishing gears in
+#' the last year. Could be reported annually and would state the total
+#' proportion of the area by depth strata in each marine region.
+#' 
+#' METHODS
+#' 
+#' These indicators aggregate the tacsat point data into a gridded data frame
+#' using the functions mapGrid.r and vmsGridCreate.r and therefore the
+#' resolution of the grid (cell size) must be defined.
+#' 
+#' DCF 5 calculates the total area of a grid of cells with fishing activity
+#' which is above a minimum threshold of number of pings or number of fishing
+#' hours (if the tacsat data contains a field with time interval between two
+#' points called SI_INTV, then the threshold will be a minimal number of hours,
+#' otherwise it will be a minimal number of points). The area of each cell is
+#' calculated with the function surface.r either via a fast and rough method
+#' using a trapezoid approximation (option "Trapezoid"), either via a more
+#' precise but slow method using a Universal Transverse Mercator projection
+#' (option "UTM"). The first method is fine for big grids of small cell sizes,
+#' the second one is better for large cell sizes. This total fishing area is
+#' processed by month.
+#' 
+#' DCF 6 also calculates the total area of a grid with fishing activity but
+#' keeps only the 90 percent of the points by discarding the outer 10\% points
+#' (or any other specified percentage). It uses the function tacsatMCP.r
+#' adapted from the aspace library. This function draws a minimum convex
+#' polygon around the central points to keep. Then these points are gridded and
+#' the total area of the cells is calculated with the surface.r function with
+#' the same optional methods as DCF 5. This total fishing area is processed by
+#' month.
+#' 
+#' DCF 7 calculates the total area of a specified polygon not impacted by
+#' mobile bottom gear. It therefore needs that the tacsat data has been merged
+#' with the logbooks in order to have a gear code (or others) for each vms
+#' point. The indicator needs a list of gear code to include as mobile bottom
+#' gears (if empty, all the points will be included). The specified area to be
+#' processed is a polygon shapefile. This polygon (or group of polygons) is
+#' then clipped with the Europe polygon to be sure that the indicator won't
+#' include land in its area calculation. If no shapefile is defined, the area
+#' of the bounding box containing all the vms points will be considered. The
+#' result is the area of the polygon less the area of the grid where fishing
+#' activity occurs. The vms pings are gridded with an optional threshold in
+#' either minimal of fishing hours or minimal number of points (see DCF 5). The
+#' area of each grid cell is calculated with the surface.r function (see DCF 5
+#' or DCF 6).
+#' 
+#' @param indicatorNum The indicator's number (5,6 or 7)
+#' @param tacsat The vms dataframe with tacsat format
+#' @param minThreshold The threshold value to consider a cell being "fished" in
+#' time if the ping time interval has been calculated and named SI_INTV,
+#' otherwise in number of pings
+#' @param pctThreshold The threshold value representing the percentage of
+#' points to include in the Minimal Convex Polygon for Indicator 6
+#' @param ltGear The list of gear codes to consider, if a gear code is present
+#' in the tacsat dataframe and named LE_GEAR
+#' @param inShapeArea The input shapefile to consider for DCF Indicator 7; path
+#' and namefile without the extension (.shp)
+#' @param cellresX The cell size along axis X
+#' @param cellresY The cell size along axis Y
+#' @param calcAreaMethod The method used to calculate the cell area, can be
+#' "Trapezoid" (quick and less acurate) or "UTM" (slow and accurate)
+#' @param plotMapTF Plot the maps
+#' @param exportGridName If mentionned, each grid will be exported as a ASCII
+#' grid
+#' @param exportTableName Name of the csv file containing the results
+#' @return For DCF Indicator 5: a list of monthly areas is returned, saved (if
+#' exportTableName is populated) and monthly grids are exported (if
+#' exportGridName is populated) For DCF Indicator 6: a list of monthly areas is
+#' returned, saved (if exportTableName is populated) and monthly grids are
+#' exported (if exportGridName is populated) For DCF Indicator 7: a annual
+#' value is returned and a grid is exported (if exportGridName is populated)
+#' @author Fabrizio Manco
+#' @seealso \code{\link{mapGrids}} \code{\link{vmsGridCreate}}
+#' \code{\link{tacsatMCP}} \code{\link{surface}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' # load the library
+#' library(vmstools)
+#' # Load the tacsat data
+#' data(tacsat)
+#' 
+#' # process the tacsat data:
+#' # mandatory if you want the gridding based on a time threshold
+#' #   (minimal number of hours)
+#' #pointInHarbour.r
+#' #filterTacsat.r
+#' #intervalTacsat.r
+#' 
+#' 
+#' # load the eflalo
+#' data(eflalo)
+#' # merge eflalo and tacsat # mandatory for DCF Indicator 7 to
+#' #   consider only a list of gear codes
+#' 
+#' # DCF Indicator 5
+#' indicators(indicatorNum=5,
+#'            tacsat,
+#'            minThreshold=0,
+#'            cellresX=0.05,
+#'            cellresY=0.05,
+#'            calcAreaMethod="Trapezoid",
+#'            plotMapTF=TRUE,
+#'            exportTableName="",
+#'            exportGridName="")
+#' 
+#' # DCF Indicator 6
+#' indicators(indicatorNum=6,
+#'            tacsat,
+#'            pctThreshold=90,
+#'            cellresX=0.05,
+#'            cellresY=0.05,
+#'            calcAreaMethod="Trapezoid",
+#'            plotMapTF=TRUE,
+#'            exportTableName="",
+#'            exportGridName="")
+#' 
+#' # DCF Indicator 7
+#' \dontrun{
+#' indicators(indicatorNum=7,
+#'            tacsat, 
+#'            ltGear=c("TBB","OTB","PTB","DRB","DRH"),
+#'            inShapeArea="Shapefile",
+#'            cellresX=0.05,
+#'            cellresY=0.05, 
+#'            calcAreaMethod="Trapezoid",
+#'            minThreshold=0,
+#'            plotMapTF=TRUE,
+#'            exportGridName="")
+#' }
+#' 
+#' @export indicators
+indicators <- function    ( indicatorNum=5,             # indicator 5, 6 or 7
+                            tacsat,                     # tacsat-like input data
+                            minThreshold=10,            # if time interval has been calculated (and named SI_INTV), it's a minimal nb of minutes, otherwise, it's minimal number of points
+                            pctThreshold=90,            # specific to indicator 6, percentage of points to include
+                            ltGear="",                  # a list of gear code to keep for the analysis /!\ gear code field must be called LE_GEAR
+                            inShapeArea="",             # specific to indicator 7, the name of the shapefile without the .shp extension
+                            cellresX=0.05,              # grid cell resolution, x axis
+                            cellresY=0.05,              # grid cell resolution, y axis
+                            calcAreaMethod="Trapezoid", # "Trapezoid" (fast and less accurate, good for small cellsizes) or "UTM" (accurate but slow, good for huge cellsizes)
+                            plotMapTF=FALSE,
+                            exportGridName="",          # if not empty, the monthly (DCF5 and 6) grids will be exported as an ASCII grid (.asc) named with this string, DCF number and month number
+                            exportTableName=""
+                            )
+  {
+  # for all indicators
+  #remove rows with NA
+  tacsat<-tacsat[complete.cases(tacsat),]
+  # gear filtering: intially only necessary for DCF 7 ("area impacted by mobile bottom gears"), but might be useful for other indicators
+  # keep only the gear codes listed in mobileBottomGear, and tacsat must have a LE_GEAR column
+  if (length(ltGear)>1 & !is.null(tacsat$LE_GEAR)) {tacsat<-subset(tacsat, tacsat$LE_GEAR %in% ltGear)}
+  
+  if (indicatorNum==7)
+    {
+    #### DCF INDICATOR 7 -  Areas not impacted by mobile bottom gears ####
+    
+    require(shapefiles)
+    require(sp)
+    require(PBSmapping)
+
+    if (inShapeArea!="")
+      { # read the shapefile
+      shapeAll<-read.shapefile(inShapeArea)
+
+      # clip the shape polygon with the land
+      clipShapeFromLand<-clipPolygons (shapeAll, europa)
+
+      vmsPingsCoord<-cbind(tacsat$SI_LONG, tacsat$SI_LATI)
+      pointInOutByPoly<-rep(0,length(vmsPingsCoord[,1]))
+
+      ltPoly<-unique(clipShapeFromLand$PID)
+
+      # points in polygons
+      for (x in 1:length(ltPoly)){
+        polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]])
+        pointInOutByPoly<-pointInOutByPoly + point.in.polygon(vmsPingsCoord[,1], vmsPingsCoord[,2], polyCoord[,1], polyCoord[,2])
+        }
+
+      tacsat$pointInOut<-pointInOutByPoly
+      tacsat<-subset(tacsat, pointInOut!=0)
+      }
+
+    # Grid the points
+    if ("SI_INTV" %in% colnames(tacsat)) { nameVarToSum="SI_INTV"} else {nameVarToSum=""}
+    if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,".asc",sep="")} else {outGridFileName<-""}
+    vmsGrid<-vmsGridCreate(tacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotPoints = FALSE, outGridFile=outGridFileName)
+
+    # calculate the area of each cell in square km
+    vmsGrid<-surface(vmsGrid, method=calcAreaMethod, includeNA=TRUE)
+
+    if (inShapeArea!="")
+      {
+      # specify which grid cell is in the polygon
+      gridPointsCoord<-coordinates(vmsGrid)
+      gridCellInOutByPoly<-rep(0,length(gridPointsCoord[,1]))
+
+      # cells in polygon
+      for (x in 1:length(ltPoly)){
+        polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]])
+        gridCellInOutByPoly<-gridCellInOutByPoly + point.in.polygon(gridPointsCoord[,1], gridPointsCoord[,2], polyCoord[,1], polyCoord[,2])
+        }
+
+      vmsGrid$inPolygon<-gridCellInOutByPoly
+      areaInPolygon<-sum(vmsGrid@data$cellArea[vmsGrid@data$inPolygon==1])
+
+      } else {areaInPolygon<-sum(vmsGrid@data$cellArea)}  # if no shapefile area is provided the entire rectangle containing the pings is processed
+
+    # calculate the areas
+    areaFishing<-sum(vmsGrid@data$cellArea[!is.na(vmsGrid@data$fishing) & vmsGrid@data$fishing>minThreshold])
+
+    # calculate the result
+    tableResultDCF<-areaInPolygon-areaFishing
+    }
+
+  #### DCF INDICATORS 5 AND 6 ####
+  if (indicatorNum==5 | indicatorNum==6)
+    {
+    # commons for DCF 5 and 6
+    if (!"SI_DATIM" %in% colnames(tacsat)) {tacsat$SI_DATIM  <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")}
+    ltMonth<-unique(as.numeric(format(tacsat$SI_DATIM, format="%m")))
+    ltMonth<-sort(ltMonth)
+    tableResultDCF=data.frame(month=ltMonth, DCF=rep(0, length(ltMonth)))
+
+    # monthly process
+    for (x in 1:length(ltMonth)){
+      currMonth<-ltMonth[x]
+      monthlyTacsat<-subset(tacsat, as.numeric(format(tacsat$SI_DATIM, format="%m"))==currMonth)
+      
+      if (indicatorNum==5)
+      {
+        # specific to DCF 5
+        if ("SI_INTV" %in% colnames(tacsat)) {nameVarToSum="SI_INTV"} else {nameVarToSum=""}
+      }
+      
+      if (indicatorNum==6)
+      {
+        # specific to DCF 6
+        # flag the pings inside the MCP according to the pctThreshold
+        monthlyTacsat<-tacsatMCP(monthlyTacsat, pctThreshold)
+        # grid the vms pings inside the MCP
+        monthlyTacsat<-subset(monthlyTacsat, monthlyTacsat$INMCP!=0)
+        nameVarToSum=""
+      }
+      
+      # Create the grid
+      if (plotMapTF) {x11()}
+      if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,"_Month", currMonth,".asc",sep="")} else {outGridFileName<-""}
+      monthlyVmsGrid<-vmsGridCreate(monthlyTacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotTitle=paste("Month ", currMonth), plotPoints = FALSE, outGridFile=outGridFileName)
+      if (plotMapTF==TRUE & indicatorNum==6) {plot_mcp(plotnew=FALSE, plotpoints=FALSE, titletxt="")}   # plot the specific DCF 6 MCP
+
+      # calculate the area of each cell in square km and store it into a table
+      monthlyVmsGrid<-surface(monthlyVmsGrid, method=calcAreaMethod, includeNA=FALSE)
+      tableResultDCF[x,2]<-sum(monthlyVmsGrid@data$cellArea[!is.na(monthlyVmsGrid@data$fishing) & monthlyVmsGrid@data$fishing>minThreshold])
+      }
+    if (exportTableName!="") 
+      {
+      # export the table as a csv in the wd
+      write.csv(tableResultDCF, paste(exportTableName, indicatorNum, ".csv", sep=""))
+      }
+    }
+  return(tableResultDCF)
+  }
diff --git a/vmstools/R/interCubicHermiteSpline.r b/vmstools/R/interCubicHermiteSpline.r
index df24ebe..2253bb2 100644
--- a/vmstools/R/interCubicHermiteSpline.r
+++ b/vmstools/R/interCubicHermiteSpline.r
@@ -1,44 +1,57 @@
-
-interCubicHermiteSpline <- function(spltx,spltCon,res,params,headingAdjustment){
-
-      #Formula of Cubic Hermite Spline
-      t   <- seq(0,1,length.out=res)
-      F00 <- 2*t^3 -3*t^2 + 1
-      F10 <- t^3-2*t^2+t
-      F01 <- -2*t^3+3*t^2
-      F11 <- t^3-t^2
-
-      #Making tacsat dataset ready
-      spltx[spltCon[,1],"SI_HE"][which(is.na(spltx[spltCon[,1],"SI_HE"]))] <- 0
-      spltx[spltCon[,2],"SI_HE"][which(is.na(spltx[spltCon[,2],"SI_HE"]))] <- 0
-
-      #Heading at begin point in degrees
-      Hx0 <- sin(spltx[spltCon[,1],"SI_HE"]/(180/pi))
-      Hy0 <- cos(spltx[spltCon[,1],"SI_HE"]/(180/pi))
-
-      #Heading at end point in degrees
-      Hx1 <- sin(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
-      Hy1 <- cos(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
-
-      #Start and end positions
-      Mx0 <- spltx[spltCon[,1],"SI_LONG"]
-      Mx1 <- spltx[spltCon[,2],"SI_LONG"]
-      My0 <- spltx[spltCon[,1],"SI_LATI"]
-      My1 <- spltx[spltCon[,2],"SI_LATI"]
-
-      #Corrected for longitude lattitude effect
-      Hx0 <- Hx0 * params$fm * spltx[spltCon[,1],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
-      Hx1 <- Hx1 * params$fm * spltx[spltCon[,2],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
-      Hy0 <- Hy0 * params$fm * lonLatRatio(spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,1],"SI_LATI"]) * spltx[spltCon[,1],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
-      Hy1 <- Hy1 * params$fm * lonLatRatio(spltx[spltCon[,2],"SI_LONG"],spltx[spltCon[,2],"SI_LATI"]) * spltx[spltCon[,2],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
-
-      #Get the interpolation
-      fx  <- outer(F00,Mx0,"*")+outer(F10,Hx0,"*")+outer(F01,Mx1,"*")+outer(F11,Hx1,"*")
-      fy  <- outer(F00,My0,"*")+outer(F10,Hy0,"*")+outer(F01,My1,"*")+outer(F11,Hy1,"*")
-
-      #Create output format
-      intsx   <- lapply(as.list(1:nrow(spltCon)),function(x){
-                    matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
-                           dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
-  return(intsx)}
-
+#' Interpolate using Cubic Hermite Spline
+#' 
+#' This is an internal function of interpolateTacsat. Function to interpolate
+#' VMS data using a cubic hermite spline function.
+#' 
+#' 
+#' @param spltx Tacsat dataset split up by vessel using 'split'.
+#' @param spltCon List of VMS records that are connected
+#' @param res Number of additional points to use for interpolation
+#' @param params Parameters to use for interpolation
+#' @param headingAdjustment Parameter to adjust the choice of heading depending
+#' on its own or previous point
+#' @author Niels T. Hintzen
+#' @export interCubicHermiteSpline
+interCubicHermiteSpline <- function(spltx,spltCon,res,params,headingAdjustment){
+
+      #Formula of Cubic Hermite Spline
+      t   <- seq(0,1,length.out=res)
+      F00 <- 2*t^3 -3*t^2 + 1
+      F10 <- t^3-2*t^2+t
+      F01 <- -2*t^3+3*t^2
+      F11 <- t^3-t^2
+
+      #Making tacsat dataset ready
+      spltx[spltCon[,1],"SI_HE"][which(is.na(spltx[spltCon[,1],"SI_HE"]))] <- 0
+      spltx[spltCon[,2],"SI_HE"][which(is.na(spltx[spltCon[,2],"SI_HE"]))] <- 0
+
+      #Heading at begin point in degrees
+      Hx0 <- sin(spltx[spltCon[,1],"SI_HE"]/(180/pi))
+      Hy0 <- cos(spltx[spltCon[,1],"SI_HE"]/(180/pi))
+
+      #Heading at end point in degrees
+      Hx1 <- sin(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
+      Hy1 <- cos(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
+
+      #Start and end positions
+      Mx0 <- spltx[spltCon[,1],"SI_LONG"]
+      Mx1 <- spltx[spltCon[,2],"SI_LONG"]
+      My0 <- spltx[spltCon[,1],"SI_LATI"]
+      My1 <- spltx[spltCon[,2],"SI_LATI"]
+
+      #Corrected for longitude lattitude effect
+      Hx0 <- Hx0 * params$fm * spltx[spltCon[,1],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
+      Hx1 <- Hx1 * params$fm * spltx[spltCon[,2],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
+      Hy0 <- Hy0 * params$fm * lonLatRatio(spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,1],"SI_LATI"]) * spltx[spltCon[,1],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
+      Hy1 <- Hy1 * params$fm * lonLatRatio(spltx[spltCon[,2],"SI_LONG"],spltx[spltCon[,2],"SI_LATI"]) * spltx[spltCon[,2],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
+
+      #Get the interpolation
+      fx  <- outer(F00,Mx0,"*")+outer(F10,Hx0,"*")+outer(F01,Mx1,"*")+outer(F11,Hx1,"*")
+      fy  <- outer(F00,My0,"*")+outer(F10,Hy0,"*")+outer(F01,My1,"*")+outer(F11,Hy1,"*")
+
+      #Create output format
+      intsx   <- lapply(as.list(1:nrow(spltCon)),function(x){
+                    matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
+                           dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
+  return(intsx)}
+
diff --git a/vmstools/R/interStraightLine.r b/vmstools/R/interStraightLine.r
index 1f5de2b..013e5dd 100644
--- a/vmstools/R/interStraightLine.r
+++ b/vmstools/R/interStraightLine.r
@@ -1,12 +1,22 @@
-
-interStraightLine <- function(spltx,spltCon,res){
-
-     fx <- mapply(seq,spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,2],"SI_LONG"],length.out=res)
-     fy <- mapply(seq,spltx[spltCon[,1],"SI_LATI"],spltx[spltCon[,2],"SI_LATI"],length.out=res)
-
-     #Create output format
-     intsx   <- lapply(as.list(1:nrow(spltCon)),function(x){
-                   matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
-                          dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
-  return(intsx)}
-
+#' Interpolate using Straight line
+#' 
+#' This is an internal function of interpolateTacsat. Function to interpolate
+#' VMS data using a straight line.
+#' 
+#' 
+#' @param spltx Tacsat dataset split up by vessel using 'split'.
+#' @param spltCon List of VMS records that are connected
+#' @param res Number of additional points to use for interpolation
+#' @author Niels T. Hintzen
+#' @export interStraightLine
+interStraightLine <- function(spltx,spltCon,res){
+
+     fx <- mapply(seq,spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,2],"SI_LONG"],length.out=res)
+     fy <- mapply(seq,spltx[spltCon[,1],"SI_LATI"],spltx[spltCon[,2],"SI_LATI"],length.out=res)
+
+     #Create output format
+     intsx   <- lapply(as.list(1:nrow(spltCon)),function(x){
+                   matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
+                          dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
+  return(intsx)}
+
diff --git a/vmstools/R/interpolateTacsat.r b/vmstools/R/interpolateTacsat.r
index eadd097..9c3f6a1 100644
--- a/vmstools/R/interpolateTacsat.r
+++ b/vmstools/R/interpolateTacsat.r
@@ -1,155 +1,227 @@
-`interpolateTacsat` <-
-function(tacsat                          #VMS datapoints
-                              ,interval=120             #Specify in minutes, NULL means use all points
-                              ,margin=12                #Specify the margin in minutes that the interval might deviate in a search for the next point
-                              ,res=100                  #Resolution of interpolation method (default = 100)
-                              ,method="cHs"             #Specify the method to be used: Straight line (SL) of cubic Hermite spline (cHs)
-                              ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6))  #Specify the three parameters: fm, distscale, sigline, speedthreshold
-                              ,headingAdjustment=0
-                              ,fast=FALSE){
-
-if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM     <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-tacsat$sort.int <- 1:nrow(tacsat)
-tacsat    <- sortTacsat(tacsat)
-if(any(diff(tacsat$sort.int)!=1)) stop("Sort tacsat first using 'sortTacsat'")
-  #Start interpolating the data
-if(!method %in% c("cHs","SL"))  stop("method selected that does not exist")
-
-#-------------------------------------------------------------------------------
-#Fast method or not
-#-------------------------------------------------------------------------------
-if(fast){
-  #Interpolation only by vessel, so split tacsat up
-  tacsat$ID <- 1:nrow(tacsat)
-  splitTa   <- split(tacsat,tacsat$VE_REF)
-  spltTaCon <- lapply(splitTa,function(spltx){
-                  #Calculate time different between every record
-                  dftimex <- outer(spltx$SI_DATIM,spltx$SI_DATIM,difftime,units="mins")
-                  iStep   <- 1
-                  connect <- list()
-                  counter <- 1
-                  #Loop over all possible combinations and store if a connection can be made
-                  while(iStep <= nrow(spltx)){
-                    endp <- which(dftimex[,iStep] >= (interval - margin) & dftimex[,iStep] <= (interval + margin))
-                    if(length(endp)>0){
-                      if(length(endp)>1) endp <- endp[which.min(abs(interval - dftimex[endp,iStep]))][1]
-                      connect[[counter]]    <- c(iStep,endp)
-                      counter               <- counter + 1
-                      iStep                 <- endp
-                    } else { iStep          <- iStep + 1}
-                  }
-                  #Return matrix of conenctions
-                  return(do.call(rbind,connect))})
-
-  if(method=="cHs") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
-                                              return(interCubicHermiteSpline(spltx=splitTa[[y]],spltCon=spltTaCon[[y]],res,params,headingAdjustment))}),recursive=FALSE)
-  if(method=="SL")  returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
-                                              return(interStraightLine(splitTa[[y]],spltTaCon[[y]],res))}),recursive=FALSE)
-
-} else {
-  
-
-    #Initiate returning result object
-  returnInterpolations <- list()
-
-  #Make vectors out of tacsat data to speed up interpolation
-  VE_REF          <- tacsat$VE_REF
-  SI_LATI         <- tacsat$SI_LATI
-  SI_LONG         <- tacsat$SI_LONG
-  SI_SP           <- tacsat$SI_SP
-  SI_HE           <- tacsat$SI_HE
-  SI_DATIM        <- tacsat$SI_DATIM
-
-    #Start iterating over succeeding points
-  for(iStep in 1:(dim(tacsat)[1]-1)){
-  #for(iStep in 1:(4558)){
-    #print(iStep)
-    if(iStep == 1){
-      iSuccess    <- 0
-      endDataSet  <- 0
-      startVMS    <- 1
-      ship        <- VE_REF[startVMS]
-    } else {
-        if(is.na(endVMS)==TRUE) endVMS <- startVMS + 1
-        startVMS <- endVMS
-        ship      <- VE_REF[startVMS]
-        if(endDataSet == 1 & (rev(unique(VE_REF))[1] == ship | startVMS > length(VE_REF))) endDataSet <- 2 #Final end of dataset
-      }
-
-    #if end of dataset is not reached, try to find succeeding point
-    if(endDataSet != 2){
-      idx         <- which(VE_REF == VE_REF[startVMS])
-      startidx    <- which(idx == startVMS)
-      result      <- findEndTacsat(SI_DATIM[idx],startVMS=startidx,interval,margin)
-      endVMS      <- result[1]+idx[startidx]
-      endDataSet  <- result[2]
-
-      if(startVMS == dim(tacsat)[1] | (startVMS+1 == dim(tacsat)[1] & VE_REF[startVMS] != VE_REF[startVMS+1])){
-        endDataSet <- 1
-        endVMS <- NA
-      }
-
-      if(is.na(endVMS)==TRUE) int <- 0  #No interpolation possible
-      if(is.na(endVMS)==FALSE) int <- 1  #Interpolation possible
-        #Interpolate according to the Cubic Hermite Spline method
-      if(method == "cHs" & int == 1){
-
-          #Define the cHs formula
-        F00 <- numeric()
-        F10 <- numeric()
-        F01 <- numeric()
-        F11 <- numeric()
-        i   <- 0
-        t   <- seq(0,1,length.out=res)
-        F00 <- 2*t^3 -3*t^2 + 1
-        F10 <- t^3-2*t^2+t
-        F01 <- -2*t^3+3*t^2
-        F11 <- t^3-t^2
-
-        if (is.na(SI_HE[startVMS])=="TRUE") SI_HE[startVMS] <- 0
-        if (is.na(SI_HE[endVMS])=="TRUE")   SI_HE[endVMS]   <- 0
-
-          #Heading at begin point in degrees
-        Hx0 <- sin(SI_HE[startVMS]/(180/pi))
-        Hy0 <- cos(SI_HE[startVMS]/(180/pi))
-          #Heading at end point in degrees
-        Hx1 <- sin(SI_HE[endVMS-headingAdjustment]/(180/pi))
-        Hy1 <- cos(SI_HE[endVMS-headingAdjustment]/(180/pi))
-
-        Mx0 <- SI_LONG[startVMS]
-        Mx1 <- SI_LONG[endVMS]
-        My0 <- SI_LATI[startVMS]
-        My1 <- SI_LATI[endVMS]
-
-          #Corrected for longitude lattitude effect
-        Hx0 <- Hx0 * params$fm * SI_SP[startVMS] /((params$st[2]-params$st[1])/2+params$st[1])
-        Hx1 <- Hx1 * params$fm * SI_SP[endVMS]   /((params$st[2]-params$st[1])/2+params$st[1])
-        Hy0 <- Hy0 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[1] * SI_SP[startVMS]/((params$st[2]-params$st[1])/2+params$st[1])
-        Hy1 <- Hy1 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[2] * SI_SP[endVMS]/((params$st[2]-params$st[1])  /2+params$st[1])
-
-          #Finalizing the interpolation based on cHs
-        fx <- numeric()
-        fy <- numeric()
-        fx <- F00*Mx0+F10*Hx0+F01*Mx1+F11*Hx1
-        fy <- F00*My0+F10*Hy0+F01*My1+F11*Hy1
-
-          #Add one to list of successful interpolations
-        iSuccess <- iSuccess + 1
-        returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
-      }
-
-        #Interpolate according to a straight line
-      if(method == "SL" & int == 1){
-        fx <- seq(SI_LONG[startVMS],SI_LONG[endVMS],length.out=res)
-        fy <- seq(SI_LATI[startVMS],SI_LATI[endVMS],length.out=res)
-
-          #Add one to list of successful interpolations
-        iSuccess <- iSuccess + 1
-        returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
-      }
-    }
-  }
-}
-  
-return(returnInterpolations)}
-
+#' Find interpolated points between two succeeding GPS datapoints
+#' 
+#' Method to find interpolated points between two succeeding Tacsat / GPS
+#' datapoints. Two different methods can be applied: the cubic Hermite spline
+#' method as developed in Hintzen et al. 2010 or an interpolation of a straight
+#' line (on a x-y plane, not a sphere)
+#' 
+#' Interval: In most Tacsat datasets the succeeding datapoint can be found 1 or
+#' 2 hours appart. This interval time should be specified here. Interval can
+#' also be specified as e.g. 15 minutes if the Tacsat / GPS dataset allows
+#' this. Margin: Hardly ever, the interval time is precise. To allow some
+#' deviation from the 1 or 2 hour interval the margin can be adjusted.
+#' 
+#' The 'res' value defines the number of points to use to construct the
+#' interpolation. Default = 100 which results in a high resolution
+#' interpolation.
+#' 
+#' Params can be obtained from analyses (see Hintzen et al. 2010) or can be
+#' varied to explore the differences. To interpolate only first value needs to
+#' be specified and the speedtreshold needs to be given.
+#' 
+#' headingAdjustment can either be 0 or 1 in case the heading at the endpoint
+#' does not represent the heading of the arriving vessel to that point but the
+#' departing vessel. This is only of use when heading needs to be calculated
+#' outside the GPS dataset where heading is calculated at the bearing between
+#' two points. For the startpoint, the bearing is correct, for the endpoint
+#' however, the heading is incorrect.
+#' 
+#' The object returned is a list. Each list item contains in the first row the
+#' index of the succeeding Tacsat pionts interpolated. The second till last row
+#' contain the x (first column) and y (second column) interpolated values. The
+#' length of the list indicates the number of interpolations created.
+#' 
+#' @param tacsat Tacsat dataset with GPS points that need to be interpolated
+#' @param interval Time in minutes between the succeeding datapoints
+#' @param margin Deviation from specified interval to find succeeding
+#' datapoints
+#' @param res Number of points to use to create interpolation (including start
+#' and end point), default = 100
+#' @param method "cHs" for cubic Hermite spline or "SL" for Straight Line
+#' interpolation, default = 0.2,20,0.2
+#' @param params Params needed for cHs method, including the speed thresholds
+#' used
+#' @param headingAdjustment Parameter to adjust the choice of heading depending
+#' on its own or previous point
+#' @param fast If your machine holds enough RAM (4gig or more) than the fast
+#' version runs 7x faster
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{findEndTacsat}}, \code{\link{distance}},
+#' \code{\link{lonLatRatio}}, \code{\link{plotInterpolation}},
+#' \code{\link{sortTacsat}}, \code{\link{filterTacsat}}
+#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using
+#' cubic Hermite spline interpolation of position registration data, EU lot 2
+#' project
+#' @examples
+#' 
+#'   #Load the Tacsat data
+#' data(tacsat)
+#' 
+#'   #Sort the VMS data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Filter the Tacsat data
+#' tacsat     <- filterTacsat(tacsat,st=c(2,6),hd=NULL)
+#' 
+#'   #Interpolate the VMS data
+#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,res=100,
+#'                                    method="cHs",params=list(fm=0.5,distscale=20,
+#'                                    sigline=0.2,st=c(2,6)),headingAdjustment=0)
+#' 
+#' @export interpolateTacsat
+`interpolateTacsat` <-
+function(tacsat                          #VMS datapoints
+                              ,interval=120             #Specify in minutes, NULL means use all points
+                              ,margin=12                #Specify the margin in minutes that the interval might deviate in a search for the next point
+                              ,res=100                  #Resolution of interpolation method (default = 100)
+                              ,method="cHs"             #Specify the method to be used: Straight line (SL) of cubic Hermite spline (cHs)
+                              ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6))  #Specify the three parameters: fm, distscale, sigline, speedthreshold
+                              ,headingAdjustment=0
+                              ,fast=FALSE){
+
+if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM     <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+tacsat$sort.int <- 1:nrow(tacsat)
+tacsat    <- sortTacsat(tacsat)
+if(any(diff(tacsat$sort.int)!=1)) stop("Sort tacsat first using 'sortTacsat'")
+  #Start interpolating the data
+if(!method %in% c("cHs","SL"))  stop("method selected that does not exist")
+
+#-------------------------------------------------------------------------------
+#Fast method or not
+#-------------------------------------------------------------------------------
+if(fast){
+  #Interpolation only by vessel, so split tacsat up
+  tacsat$ID <- 1:nrow(tacsat)
+  splitTa   <- split(tacsat,tacsat$VE_REF)
+  spltTaCon <- lapply(splitTa,function(spltx){
+                  #Calculate time different between every record
+                  dftimex <- outer(spltx$SI_DATIM,spltx$SI_DATIM,difftime,units="mins")
+                  iStep   <- 1
+                  connect <- list()
+                  counter <- 1
+                  #Loop over all possible combinations and store if a connection can be made
+                  while(iStep <= nrow(spltx)){
+                    endp <- which(dftimex[,iStep] >= (interval - margin) & dftimex[,iStep] <= (interval + margin))
+                    if(length(endp)>0){
+                      if(length(endp)>1) endp <- endp[which.min(abs(interval - dftimex[endp,iStep]))][1]
+                      connect[[counter]]    <- c(iStep,endp)
+                      counter               <- counter + 1
+                      iStep                 <- endp
+                    } else { iStep          <- iStep + 1}
+                  }
+                  #Return matrix of conenctions
+                  return(do.call(rbind,connect))})
+
+  if(method=="cHs") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
+                                              return(interCubicHermiteSpline(spltx=splitTa[[y]],spltCon=spltTaCon[[y]],res,params,headingAdjustment))}),recursive=FALSE)
+  if(method=="SL")  returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
+                                              return(interStraightLine(splitTa[[y]],spltTaCon[[y]],res))}),recursive=FALSE)
+
+} else {
+  
+
+    #Initiate returning result object
+  returnInterpolations <- list()
+
+  #Make vectors out of tacsat data to speed up interpolation
+  VE_REF          <- tacsat$VE_REF
+  SI_LATI         <- tacsat$SI_LATI
+  SI_LONG         <- tacsat$SI_LONG
+  SI_SP           <- tacsat$SI_SP
+  SI_HE           <- tacsat$SI_HE
+  SI_DATIM        <- tacsat$SI_DATIM
+
+    #Start iterating over succeeding points
+  for(iStep in 1:(dim(tacsat)[1]-1)){
+  #for(iStep in 1:(4558)){
+    #print(iStep)
+    if(iStep == 1){
+      iSuccess    <- 0
+      endDataSet  <- 0
+      startVMS    <- 1
+      ship        <- VE_REF[startVMS]
+    } else {
+        if(is.na(endVMS)==TRUE) endVMS <- startVMS + 1
+        startVMS <- endVMS
+        ship      <- VE_REF[startVMS]
+        if(endDataSet == 1 & (rev(unique(VE_REF))[1] == ship | startVMS > length(VE_REF))) endDataSet <- 2 #Final end of dataset
+      }
+
+    #if end of dataset is not reached, try to find succeeding point
+    if(endDataSet != 2){
+      idx         <- which(VE_REF == VE_REF[startVMS])
+      startidx    <- which(idx == startVMS)
+      result      <- findEndTacsat(SI_DATIM[idx],startVMS=startidx,interval,margin)
+      endVMS      <- result[1]+idx[startidx]
+      endDataSet  <- result[2]
+
+      if(startVMS == dim(tacsat)[1] | (startVMS+1 == dim(tacsat)[1] & VE_REF[startVMS] != VE_REF[startVMS+1])){
+        endDataSet <- 1
+        endVMS <- NA
+      }
+
+      if(is.na(endVMS)==TRUE) int <- 0  #No interpolation possible
+      if(is.na(endVMS)==FALSE) int <- 1  #Interpolation possible
+        #Interpolate according to the Cubic Hermite Spline method
+      if(method == "cHs" & int == 1){
+
+          #Define the cHs formula
+        F00 <- numeric()
+        F10 <- numeric()
+        F01 <- numeric()
+        F11 <- numeric()
+        i   <- 0
+        t   <- seq(0,1,length.out=res)
+        F00 <- 2*t^3 -3*t^2 + 1
+        F10 <- t^3-2*t^2+t
+        F01 <- -2*t^3+3*t^2
+        F11 <- t^3-t^2
+
+        if (is.na(SI_HE[startVMS])=="TRUE") SI_HE[startVMS] <- 0
+        if (is.na(SI_HE[endVMS])=="TRUE")   SI_HE[endVMS]   <- 0
+
+          #Heading at begin point in degrees
+        Hx0 <- sin(SI_HE[startVMS]/(180/pi))
+        Hy0 <- cos(SI_HE[startVMS]/(180/pi))
+          #Heading at end point in degrees
+        Hx1 <- sin(SI_HE[endVMS-headingAdjustment]/(180/pi))
+        Hy1 <- cos(SI_HE[endVMS-headingAdjustment]/(180/pi))
+
+        Mx0 <- SI_LONG[startVMS]
+        Mx1 <- SI_LONG[endVMS]
+        My0 <- SI_LATI[startVMS]
+        My1 <- SI_LATI[endVMS]
+
+          #Corrected for longitude lattitude effect
+        Hx0 <- Hx0 * params$fm * SI_SP[startVMS] /((params$st[2]-params$st[1])/2+params$st[1])
+        Hx1 <- Hx1 * params$fm * SI_SP[endVMS]   /((params$st[2]-params$st[1])/2+params$st[1])
+        Hy0 <- Hy0 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[1] * SI_SP[startVMS]/((params$st[2]-params$st[1])/2+params$st[1])
+        Hy1 <- Hy1 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[2] * SI_SP[endVMS]/((params$st[2]-params$st[1])  /2+params$st[1])
+
+          #Finalizing the interpolation based on cHs
+        fx <- numeric()
+        fy <- numeric()
+        fx <- F00*Mx0+F10*Hx0+F01*Mx1+F11*Hx1
+        fy <- F00*My0+F10*Hy0+F01*My1+F11*Hy1
+
+          #Add one to list of successful interpolations
+        iSuccess <- iSuccess + 1
+        returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
+      }
+
+        #Interpolate according to a straight line
+      if(method == "SL" & int == 1){
+        fx <- seq(SI_LONG[startVMS],SI_LONG[endVMS],length.out=res)
+        fy <- seq(SI_LATI[startVMS],SI_LATI[endVMS],length.out=res)
+
+          #Add one to list of successful interpolations
+        iSuccess <- iSuccess + 1
+        returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
+      }
+    }
+  }
+}
+  
+return(returnInterpolations)}
+
diff --git a/vmstools/R/interpolation2Tacsat.r b/vmstools/R/interpolation2Tacsat.r
index efef156..7dbbd29 100644
--- a/vmstools/R/interpolation2Tacsat.r
+++ b/vmstools/R/interpolation2Tacsat.r
@@ -1,158 +1,206 @@
-interpolation2Tacsat <- function (interpolation, tacsat, npoints = 10, equalDist = TRUE)
-{
-    tacsat <- sortTacsat(tacsat)
-    if (!"HL_ID" %in% colnames(tacsat))
-        tacsat$HL_ID <- 1:nrow(tacsat)
-    if (!"SI_DATIM" %in% colnames(tacsat))
-        tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,
-            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-
-    #- Convert the interpolation to equally distance spaced points
-    if (equalDist) {
-        interpolationEQ <- equalDistance(interpolation, npoints)
-    } else {
-        interpolationEQ <- lapply(interpolation, function(x) {
-            idx <- round(seq(2, nrow(x), length.out = npoints))
-            return(x[c(1, idx), ])
-        })
-    }
-    #- Get the interpolated longitudes and latitudes out
-    int <- lapply(interpolationEQ, function(x) {
-        x[-1, ]
-    })
-    lenint <- lapply(int, length)
-    idx <- which(unlist(lenint) == (npoints * 2))
-    int <- lapply(interpolationEQ[idx], function(x) {
-        x[-1, ]
-    })
-    lenEQ <- length(interpolationEQ[idx])
-    intmin2 <- lapply(int, function(x) {
-        x <- x[-c(1, nrow(x)), ]
-        colnames(x) <- c("SI_LONG", "SI_LATI")
-        return(x)
-    })
-    intidx <- lapply(interpolationEQ[idx], function(x) {
-        x[1, ]
-    })
-
-    #- Make a unique ID per interpolation
-    INT_ID <- as.data.frame(do.call(rbind,intidx))
-    INT_ID$INT_ID <- 1:nrow(INT_ID)
-    tacsat$INT_ID <- NA
-    for(iRow in 1:nrow(INT_ID))
-      tacsat[INT_ID[iRow,1]:INT_ID[iRow,2],"INT_ID"] <- INT_ID$INT_ID[iRow]
-
-    #- Aggregate tacsat information for all pings within an interpolation
-    idxNoIntTacsat  <- which(is.na(tacsat$INT_ID))
-    if(length(idxNoIntTacsat)>0){
-      noIntTacsat     <- tacsat[idxNoIntTacsat,]
-      intTacsat       <- tacsat[-idxNoIntTacsat,]
-
-      matchClnms  <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))]
-      aggInttacsat<- aggregate(tacsat[-idxNoIntTacsat,matchClnms],by=list(tacsat[-idxNoIntTacsat,"INT_ID"]),FUN=sum,na.rm=T)
-      colnames(aggInttacsat)[1] <- "INT_ID"
-    } else {
-      noIntTacsat <- numeric()
-      intTacsat   <- tacsat
-      matchClnms  <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))]
-      aggInttacsat<- aggregate(tacsat[,matchClnms],by=list(tacsat[,"INT_ID"]),FUN=sum,na.rm=T)
-      colnames(aggInttacsat)[1] <- "INT_ID"
-    }
-
-    #- get the column names that cannot be interpolated
-    clnames <- colnames(tacsat)
-    b <- clnames[!clnames %in% c("SI_LONG", "SI_LATI",
-            "SI_HE", "SI_SP", "SI_DATE", "SI_TIME",
-            "SI_DATIM")]
-
-    #- Replicate values that cannot be interpolated
-    bvals <- lapply(as.list(1:lenEQ), function(x) {
-        matrix(unlist(tacsat[intidx[[x]][1], b]), nrow = npoints -
-            2, ncol = length(b), byrow = TRUE, dimnames = list(round(seq(intidx[[x]][1],
-            intidx[[x]][2], length.out = npoints - 2), 3), b))
-    })
-    #- Apply a simple equal speading to the data values (Ideally we would make this dependent on SI_SP but then the equalDist should be modified too)
-    bvals <- lapply(as.list(1:lenEQ),function(x){
-              dat2replace <- subset(aggInttacsat,INT_ID == bvals[[x]][1,"INT_ID"])
-              if(length(kgeur(b))>0){
-                for(iSpec in b[kgeur(b)])
-                  bvals[[x]][,iSpec] <- c(dat2replace[,iSpec]/(npoints-1))
-                mode(bvals[[x]][,kgeur(b)]) <- "numeric"
-              }
-              if("INTV" %in% b){
-                bvals[[x]][,"INTV"] <- c(dat2replace[,"INTV"]/(npoints-1))
-                mode(bvals[[x]][,"INTV"]) <- "numeric"
-              }
-              if("LE_SURF" %in% b){
-                bvals[[x]][,"LE_SURF"] <- c(dat2replace[,"LE_SURF"]/(npoints-1))
-                mode(bvals[[x]][,"LE_SURF"]) <- "numeric"
-              }
-              if("LE_SUBSURF" %in% b){
-                bvals[[x]][,"LE_SUBSURF"] <- c(dat2replace[,"LE_SUBSURF"]/(npoints-1))
-                mode(bvals[[x]][,"LE_SUBSURF"]) <- "numeric"
-              }
-              return(bvals[[x]])
-    })
-
-    SI_DATIMs <- data.frame(from = tacsat$SI_DATIM[do.call(rbind,
-        intidx)[, 1]], to = tacsat$SI_DATIM[do.call(rbind, intidx)[,
-        2]])
-    SI_DATIMs <- lapply(as.list(1:lenEQ), function(x) {
-        seq(SI_DATIMs[x, 1], SI_DATIMs[x, 2], length.out = npoints)[2:(npoints -
-            1)]
-    })
-    SI_DATE <- lapply(SI_DATIMs, function(x) {
-        format(x, format = "%d/%m/%Y")
-    })
-    timeNotation <- ifelse(length(unlist(strsplit(tacsat$SI_TIME[1],
-        ":"))) > 2, "secs", "mins")
-    if (timeNotation == "secs")
-        SI_TIME <- lapply(SI_DATIMs, function(x) {
-            format(x, format = "%H:%M:%S")
-        })
-    if (timeNotation == "mins")
-        SI_TIME <- lapply(SI_DATIMs, function(x) {
-            format(x, format = "%H:%M")
-        })
-    SI_SPs <- as.matrix(data.frame(from = tacsat$SI_SP[do.call(rbind,
-        intidx)[, 1]], to = tacsat$SI_SP[do.call(rbind, intidx)[,
-        2]]))
-    SI_SP <- mapply(seq, from = SI_SPs[, 1], to = SI_SPs[, 2],
-        length.out = npoints - 2)
-    SI_HE <- lapply(as.list(1:lenEQ), function(x) {
-        y <- int[[x]]
-        return(bearing(y[2:(nrow(y) - 1), 1], y[2:(nrow(y) -
-            1), 2], y[3:nrow(y), 1], y[3:nrow(y), 2]))
-    })
-    ret <- lapply(as.list(1:lenEQ), function(x) {
-        data.frame(bvals[[x]], intmin2[[x]], SI_DATIM = SI_DATIMs[[x]],
-            SI_DATE = SI_DATE[[x]], SI_TIME = SI_TIME[[x]], SI_SP = SI_SP[,
-                x], SI_HE = SI_HE[[x]], stringsAsFactors = F)
-    })
-
-    #- Need to apply the same equal spreading to the original tacsat data
-    matchClnms  <- c("INTV","LE_SURF","LE_SUBSURF",
-                     colnames(intTacsat)[kgeur(colnames(intTacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(intTacsat)[kgeur(colnames(intTacsat))]) %in% colnames(intTacsat))]
-    intTacsat[,matchClnms] <- intTacsat[,matchClnms] / (npoints-1)
-
-    #- Combine all together again
-    interpolationTot <- do.call(rbind, ret)
-    interpolationTot <- formatTacsat(interpolationTot)
-    if(length(noIntTacsat)>0){
-      tacsatInt <- rbindTacsat(rbindTacsat(intTacsat,noIntTacsat),interpolationTot)
-    } else {
-      tacsatInt <- rbindTacsat(intTacsat,interpolationTot)
-    }
-    for(iCol in c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsatInt)[kgeur(colnames(tacsatInt))]))
-      tacsatInt[,iCol] <- anf(tacsatInt[,iCol])
-    tacsatInt <- sortTacsat(tacsatInt)
-
-    #- Do consistency check
-    if(length(kgeur(colnames(tacsatInt)))>0){
-      ratio     <- sum(tacsatInt[,kgeur(colnames(tacsatInt))],na.rm=T)/sum(tacsat[,kgeur(colnames(tacsatInt))],na.rm=T)
-      if(ratio <= 0.99 | ratio >= 1.01 | is.na(ratio))
-        warnings("Some kilo/euros/surface area swept is misplaced")
-    }
-
-    return(tacsatInt)
-}
+#' Convert the output (a list) from interpolateTacsat to tacsat
+#' 
+#' This function takes the list of tracks output by interpolateTacsat and
+#' converts them back to tacsat format. This means that all the functions that
+#' work on tacsat can work with this new, larger interpolated dataset. For
+#' example you could now distribute the landings among the new pings.  The
+#' npoints argument is the optional number of pings between each real or actual
+#' ping.
+#' 
+#' The distances between the new vessel positions are assumed to be equal.
+#' 
+#' @param interpolation A list of tracks between actual pings output by the
+#' interpolateTacsat function.
+#' @param tacsat Tacsat data used when the interpolateTacsat function was
+#' called.
+#' @param npoints The number of pings or positions required between each real
+#' or actual vessel position or ping
+#' @param equalDist Logical. Whether the number of positions returned should be
+#' equally spaced or not
+#' @return A data frame in tacsat format is returned with estimated locations,
+#' speeds and headings between each real or actual vessel position. The column
+#' added HL_ID refers to the ID given to each individual interpolation.
+#' @author Niels Hintzen, Doug Beare
+#' @seealso \code{\link{interpolateTacsat}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' 
+#' data(tacsat)
+#' # Take out first 100 rows
+#' input   <- tacsat[1:100,]
+#' # set resolution for number of locations or pings required
+#' #   between each real or actual vessel location.
+#' res1    <- 100
+#' 
+#' #Do the interpolation
+#' interpolation <- interpolateTacsat(tacsat=input,interval=120,
+#'                                    margin=12,res=res1,method="cHs",
+#'                                    params=list(fm=0.4,distscale=20,sigline=0.2,
+#'                                    st=c(4,8)),headingAdjustment=0)
+#'                                           
+#' #Convert the interpolated data back to tacsat with 10 locations or
+#' # pings between each real or actual vessel location.
+#' tacsat        <- interpolation2Tacsat(interpolation=interpolation,
+#'                                       tacsat=input,npoints=10,equalDist=TRUE)
+#'   
+#' 
+#' @export interpolation2Tacsat
+interpolation2Tacsat <- function (interpolation, tacsat, npoints = 10, equalDist = TRUE)
+{
+    tacsat <- sortTacsat(tacsat)
+    if (!"HL_ID" %in% colnames(tacsat))
+        tacsat$HL_ID <- 1:nrow(tacsat)
+    if (!"SI_DATIM" %in% colnames(tacsat))
+        tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,
+            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+
+    #- Convert the interpolation to equally distance spaced points
+    if (equalDist) {
+        interpolationEQ <- equalDistance(interpolation, npoints)
+    } else {
+        interpolationEQ <- lapply(interpolation, function(x) {
+            idx <- round(seq(2, nrow(x), length.out = npoints))
+            return(x[c(1, idx), ])
+        })
+    }
+    #- Get the interpolated longitudes and latitudes out
+    int <- lapply(interpolationEQ, function(x) {
+        x[-1, ]
+    })
+    lenint <- lapply(int, length)
+    idx <- which(unlist(lenint) == (npoints * 2))
+    int <- lapply(interpolationEQ[idx], function(x) {
+        x[-1, ]
+    })
+    lenEQ <- length(interpolationEQ[idx])
+    intmin2 <- lapply(int, function(x) {
+        x <- x[-c(1, nrow(x)), ]
+        colnames(x) <- c("SI_LONG", "SI_LATI")
+        return(x)
+    })
+    intidx <- lapply(interpolationEQ[idx], function(x) {
+        x[1, ]
+    })
+
+    #- Make a unique ID per interpolation
+    INT_ID <- as.data.frame(do.call(rbind,intidx))
+    INT_ID$INT_ID <- 1:nrow(INT_ID)
+    tacsat$INT_ID <- NA
+    for(iRow in 1:nrow(INT_ID))
+      tacsat[INT_ID[iRow,1]:INT_ID[iRow,2],"INT_ID"] <- INT_ID$INT_ID[iRow]
+
+    #- Aggregate tacsat information for all pings within an interpolation
+    idxNoIntTacsat  <- which(is.na(tacsat$INT_ID))
+    if(length(idxNoIntTacsat)>0){
+      noIntTacsat     <- tacsat[idxNoIntTacsat,]
+      intTacsat       <- tacsat[-idxNoIntTacsat,]
+
+      matchClnms  <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))]
+      aggInttacsat<- aggregate(tacsat[-idxNoIntTacsat,matchClnms],by=list(tacsat[-idxNoIntTacsat,"INT_ID"]),FUN=sum,na.rm=T)
+      colnames(aggInttacsat)[1] <- "INT_ID"
+    } else {
+      noIntTacsat <- numeric()
+      intTacsat   <- tacsat
+      matchClnms  <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))]
+      aggInttacsat<- aggregate(tacsat[,matchClnms],by=list(tacsat[,"INT_ID"]),FUN=sum,na.rm=T)
+      colnames(aggInttacsat)[1] <- "INT_ID"
+    }
+
+    #- get the column names that cannot be interpolated
+    clnames <- colnames(tacsat)
+    b <- clnames[!clnames %in% c("SI_LONG", "SI_LATI",
+            "SI_HE", "SI_SP", "SI_DATE", "SI_TIME",
+            "SI_DATIM")]
+
+    #- Replicate values that cannot be interpolated
+    bvals <- lapply(as.list(1:lenEQ), function(x) {
+        matrix(unlist(tacsat[intidx[[x]][1], b]), nrow = npoints -
+            2, ncol = length(b), byrow = TRUE, dimnames = list(round(seq(intidx[[x]][1],
+            intidx[[x]][2], length.out = npoints - 2), 3), b))
+    })
+    #- Apply a simple equal speading to the data values (Ideally we would make this dependent on SI_SP but then the equalDist should be modified too)
+    bvals <- lapply(as.list(1:lenEQ),function(x){
+              dat2replace <- subset(aggInttacsat,INT_ID == bvals[[x]][1,"INT_ID"])
+              if(length(kgeur(b))>0){
+                for(iSpec in b[kgeur(b)])
+                  bvals[[x]][,iSpec] <- c(dat2replace[,iSpec]/(npoints-1))
+                mode(bvals[[x]][,kgeur(b)]) <- "numeric"
+              }
+              if("INTV" %in% b){
+                bvals[[x]][,"INTV"] <- c(dat2replace[,"INTV"]/(npoints-1))
+                mode(bvals[[x]][,"INTV"]) <- "numeric"
+              }
+              if("LE_SURF" %in% b){
+                bvals[[x]][,"LE_SURF"] <- c(dat2replace[,"LE_SURF"]/(npoints-1))
+                mode(bvals[[x]][,"LE_SURF"]) <- "numeric"
+              }
+              if("LE_SUBSURF" %in% b){
+                bvals[[x]][,"LE_SUBSURF"] <- c(dat2replace[,"LE_SUBSURF"]/(npoints-1))
+                mode(bvals[[x]][,"LE_SUBSURF"]) <- "numeric"
+              }
+              return(bvals[[x]])
+    })
+
+    SI_DATIMs <- data.frame(from = tacsat$SI_DATIM[do.call(rbind,
+        intidx)[, 1]], to = tacsat$SI_DATIM[do.call(rbind, intidx)[,
+        2]])
+    SI_DATIMs <- lapply(as.list(1:lenEQ), function(x) {
+        seq(SI_DATIMs[x, 1], SI_DATIMs[x, 2], length.out = npoints)[2:(npoints -
+            1)]
+    })
+    SI_DATE <- lapply(SI_DATIMs, function(x) {
+        format(x, format = "%d/%m/%Y")
+    })
+    timeNotation <- ifelse(length(unlist(strsplit(tacsat$SI_TIME[1],
+        ":"))) > 2, "secs", "mins")
+    if (timeNotation == "secs")
+        SI_TIME <- lapply(SI_DATIMs, function(x) {
+            format(x, format = "%H:%M:%S")
+        })
+    if (timeNotation == "mins")
+        SI_TIME <- lapply(SI_DATIMs, function(x) {
+            format(x, format = "%H:%M")
+        })
+    SI_SPs <- as.matrix(data.frame(from = tacsat$SI_SP[do.call(rbind,
+        intidx)[, 1]], to = tacsat$SI_SP[do.call(rbind, intidx)[,
+        2]]))
+    SI_SP <- mapply(seq, from = SI_SPs[, 1], to = SI_SPs[, 2],
+        length.out = npoints - 2)
+    SI_HE <- lapply(as.list(1:lenEQ), function(x) {
+        y <- int[[x]]
+        return(bearing(y[2:(nrow(y) - 1), 1], y[2:(nrow(y) -
+            1), 2], y[3:nrow(y), 1], y[3:nrow(y), 2]))
+    })
+    ret <- lapply(as.list(1:lenEQ), function(x) {
+        data.frame(bvals[[x]], intmin2[[x]], SI_DATIM = SI_DATIMs[[x]],
+            SI_DATE = SI_DATE[[x]], SI_TIME = SI_TIME[[x]], SI_SP = SI_SP[,
+                x], SI_HE = SI_HE[[x]], stringsAsFactors = F)
+    })
+
+    #- Need to apply the same equal spreading to the original tacsat data
+    matchClnms  <- c("INTV","LE_SURF","LE_SUBSURF",
+                     colnames(intTacsat)[kgeur(colnames(intTacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(intTacsat)[kgeur(colnames(intTacsat))]) %in% colnames(intTacsat))]
+    intTacsat[,matchClnms] <- intTacsat[,matchClnms] / (npoints-1)
+
+    #- Combine all together again
+    interpolationTot <- do.call(rbind, ret)
+    interpolationTot <- formatTacsat(interpolationTot)
+    if(length(noIntTacsat)>0){
+      tacsatInt <- rbindTacsat(rbindTacsat(intTacsat,noIntTacsat),interpolationTot)
+    } else {
+      tacsatInt <- rbindTacsat(intTacsat,interpolationTot)
+    }
+    for(iCol in c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsatInt)[kgeur(colnames(tacsatInt))]))
+      tacsatInt[,iCol] <- anf(tacsatInt[,iCol])
+    tacsatInt <- sortTacsat(tacsatInt)
+
+    #- Do consistency check
+    if(length(kgeur(colnames(tacsatInt)))>0){
+      ratio     <- sum(tacsatInt[,kgeur(colnames(tacsatInt))],na.rm=T)/sum(tacsat[,kgeur(colnames(tacsatInt))],na.rm=T)
+      if(ratio <= 0.99 | ratio >= 1.01 | is.na(ratio))
+        warnings("Some kilo/euros/surface area swept is misplaced")
+    }
+
+    return(tacsatInt)
+}
diff --git a/vmstools/R/intervalTacsat.r b/vmstools/R/intervalTacsat.r
index 4439647..cdd2830 100644
--- a/vmstools/R/intervalTacsat.r
+++ b/vmstools/R/intervalTacsat.r
@@ -1,72 +1,109 @@
-intervalTacsat <- function(tacsat,level="trip",weight=c(1,0),fill.na=FALSE){
-
-                    if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector")
-                    weight <- weight / sum(weight,na.rm=TRUE)
-
-                    #- First sort the tacsat dataset
-                    tacsat <- sortTacsat(tacsat)
-
-                    #- Add date-time stamp
-                    if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM  <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-
-                    #- If a trip level is specified, the interval rate can be calculated by trip (to make sure that no long
-                    #   interval rates between trips occur in comparison to by level="vessel"
-                    if(level=="trip"){
-                      if(is.null(tacsat$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level")
-                      sptacsat      <- split(tacsat,tacsat$VE_REF)
-                      tacsat$INTV   <- unlist(lapply(sptacsat,function(x){
-
-                        FT_REF  <- as.factor(x$FT_REF);
-                        res     <- by(x,FT_REF,
-                                    function(y){
-                                      if(nrow(y)>1){
-                                        difftime_xmin1  <- c(NA,difftime(y$SI_DATIM[2:nrow(y)],y$SI_DATIM[1:(nrow(y)-1)],units="mins"))
-                                        difftime_xplus1 <- c(difftime_xmin1[-1],NA)
-                                        if(any(weight == 0)){
-                                          if(weight[1] == 0) INTV       <- difftime_xplus1
-                                          if(weight[2] == 0) INTV       <- difftime_xmin1
-                                        } else {             INTV       <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2]))
-                                          }
-                                        #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
-                                        #   weight setting.
-                                        if(fill.na==TRUE){
-                                          idx                           <- which(is.na(INTV)==TRUE)
-                                          INTV[idx]                     <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE)
-                                          INTV[idx][which(INTV[idx]==0)]<- NA
-                                        }
-
-                                        return(INTV)
-                                      } else {
-                                          return(NA)
-                                        }
-                                    })
-                                                            return(unsplit(res,FT_REF))}))
-                      tacsat$INTV[which(tacsat$FT_REF == "0")] <- NA
-                    }
-                    #- If no trip level is specified, the other option is to calculate interval rate by vessel
-                    if(level=="vessel"){
-                      difftime_xmin1                  <- c(NA,difftime(tacsat$SI_DATIM[2:nrow(tacsat)],tacsat$SI_DATIM[1:(nrow(tacsat)-1)],units="mins"))
-                      difftime_xplus1                 <- c(difftime_xmin1[-1],NA)
-                      if(any(weight == 0)){
-                        if(weight[1] == 0) INTV       <- difftime_xplus1
-                        if(weight[2] == 0) INTV       <- difftime_xmin1
-                      } else {             INTV       <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2]))
-                        }
-                      #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
-                      #   weight setting.
-                      if(fill.na==TRUE){
-                        idx                           <- which(is.na(INTV)==TRUE)
-                        INTV[idx]                     <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE)
-                        INTV[idx][which(INTV[idx]==0)]<- NA
-                      }
-                      tacsat$INTV                     <- INTV
-
-                      vessels                         <- unique(tacsat$VE_REF)
-                      first.vessels                   <- unlist(lapply(as.list(vessels),function(x){which(tacsat$VE_REF==x)[1]}))
-                      last.vessels                    <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsat$VE_REF==x))[1]}))
-                      if(weight[1] != 0) tacsat$INTV[first.vessels]      <- NA
-                      if(weight[2] != 0) tacsat$INTV[last.vessels]       <- NA
-                      if(fill.na==TRUE) tacsat$INTV[first.vessels]  <- difftime_xplus1[first.vessels]
-                      if(fill.na==TRUE) tacsat$INTV[last.vessels]   <- difftime_xmin1[last.vessels]
-                    }
-                  return(tacsat)}
\ No newline at end of file
+#' Return the interval time between pings
+#' 
+#' Return the interval time between pings of one vessel or at the trip level.
+#' Interval is calculated based on ping x and ping x-1 or ping x and ping x+1.
+#' 
+#' Note that the DEFAULT interval given is the difference between ping x and
+#' ping x-1. Hence, the first ping of a vessel or trip does NOT have an
+#' interval rate and will display NA.
+#' 
+#' With weight you can specify if the interval rate is used between ping x and
+#' ping x-1 (weight = c(1,0)), if the interval rate is used between ping x and
+#' ping x+1 (weight = c(0,1)) or an intermediate weight (weight = c(0.4,0.6) /
+#' equal weight = c(0.5,0.5)).
+#' 
+#' @param tacsat tacsat dataset
+#' @param level level to get interval rate at: trip or vessel
+#' @param weight weight to apply to calculation of mean interval rate towards
+#' and away from ping
+#' @param fill.na If interval rate cannot be calculated based on default or
+#' provided weight, take closest alternative to provide an interval rate
+#' @return The original tacsat file is returned including a column: INTV which
+#' holds the interval rate in minutes
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{interpolateTacsat}},\code{\link{calculateSpeed}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' result <- intervalTacsat(tacsat[1:100,],level="vessel")
+#' result <- intervalTacsat(tacsat[1:100,],level="vessel",weight=c(2,1),fill.na=TRUE)
+#' 
+#' data(eflalo)
+#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' result  <- intervalTacsat(tacsatp[1:100,],level="trip",weight=c(1,1),fill.na=FALSE)
+#' 
+#' 
+#' @export intervalTacsat
+intervalTacsat <- function(tacsat,level="trip",weight=c(1,0),fill.na=FALSE){
+
+                    if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector")
+                    weight <- weight / sum(weight,na.rm=TRUE)
+
+                    #- First sort the tacsat dataset
+                    tacsat <- sortTacsat(tacsat)
+
+                    #- Add date-time stamp
+                    if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM  <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+
+                    #- If a trip level is specified, the interval rate can be calculated by trip (to make sure that no long
+                    #   interval rates between trips occur in comparison to by level="vessel"
+                    if(level=="trip"){
+                      if(is.null(tacsat$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level")
+                      sptacsat      <- split(tacsat,tacsat$VE_REF)
+                      tacsat$INTV   <- unlist(lapply(sptacsat,function(x){
+
+                        FT_REF  <- as.factor(x$FT_REF);
+                        res     <- by(x,FT_REF,
+                                    function(y){
+                                      if(nrow(y)>1){
+                                        difftime_xmin1  <- c(NA,difftime(y$SI_DATIM[2:nrow(y)],y$SI_DATIM[1:(nrow(y)-1)],units="mins"))
+                                        difftime_xplus1 <- c(difftime_xmin1[-1],NA)
+                                        if(any(weight == 0)){
+                                          if(weight[1] == 0) INTV       <- difftime_xplus1
+                                          if(weight[2] == 0) INTV       <- difftime_xmin1
+                                        } else {             INTV       <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2]))
+                                          }
+                                        #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
+                                        #   weight setting.
+                                        if(fill.na==TRUE){
+                                          idx                           <- which(is.na(INTV)==TRUE)
+                                          INTV[idx]                     <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE)
+                                          INTV[idx][which(INTV[idx]==0)]<- NA
+                                        }
+
+                                        return(INTV)
+                                      } else {
+                                          return(NA)
+                                        }
+                                    })
+                                                            return(unsplit(res,FT_REF))}))
+                      tacsat$INTV[which(tacsat$FT_REF == "0")] <- NA
+                    }
+                    #- If no trip level is specified, the other option is to calculate interval rate by vessel
+                    if(level=="vessel"){
+                      difftime_xmin1                  <- c(NA,difftime(tacsat$SI_DATIM[2:nrow(tacsat)],tacsat$SI_DATIM[1:(nrow(tacsat)-1)],units="mins"))
+                      difftime_xplus1                 <- c(difftime_xmin1[-1],NA)
+                      if(any(weight == 0)){
+                        if(weight[1] == 0) INTV       <- difftime_xplus1
+                        if(weight[2] == 0) INTV       <- difftime_xmin1
+                      } else {             INTV       <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2]))
+                        }
+                      #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different
+                      #   weight setting.
+                      if(fill.na==TRUE){
+                        idx                           <- which(is.na(INTV)==TRUE)
+                        INTV[idx]                     <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE)
+                        INTV[idx][which(INTV[idx]==0)]<- NA
+                      }
+                      tacsat$INTV                     <- INTV
+
+                      vessels                         <- unique(tacsat$VE_REF)
+                      first.vessels                   <- unlist(lapply(as.list(vessels),function(x){which(tacsat$VE_REF==x)[1]}))
+                      last.vessels                    <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsat$VE_REF==x))[1]}))
+                      if(weight[1] != 0) tacsat$INTV[first.vessels]      <- NA
+                      if(weight[2] != 0) tacsat$INTV[last.vessels]       <- NA
+                      if(fill.na==TRUE) tacsat$INTV[first.vessels]  <- difftime_xplus1[first.vessels]
+                      if(fill.na==TRUE) tacsat$INTV[last.vessels]   <- difftime_xmin1[last.vessels]
+                    }
+                  return(tacsat)}
diff --git a/vmstools/R/kgeur.r b/vmstools/R/kgeur.r
index 10e93e2..3382f9d 100644
--- a/vmstools/R/kgeur.r
+++ b/vmstools/R/kgeur.r
@@ -1 +1,17 @@
-kgeur <- function(x){return(c(grep("KG",x),grep("EURO",x)))}
\ No newline at end of file
+#' Return kg and euro column index of eflalo dataset
+#' 
+#' Returns the index of the columns with kg and euro information from a given
+#' eflalo dataset
+#' 
+#' 
+#' @param x Colnames of eflalo dataset (or any other dataset with column names
+#' as LE_KG_ and LE_EURO_)
+#' @author Niels T. Hintzen
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' data(eflalo)
+#' kgeur(colnames(eflalo))
+#' 
+#' @export kgeur
+kgeur <- function(x){return(c(grep("KG",x),grep("EURO",x)))}
diff --git a/vmstools/R/km2Degree.R b/vmstools/R/km2Degree.R
index 615a646..53924ea 100644
--- a/vmstools/R/km2Degree.R
+++ b/vmstools/R/km2Degree.R
@@ -1,12 +1,35 @@
-`km2Degree` <-
-function(lon,lat,km){
-                      x1 <- lon
-                      y1 <- lat
-                      
-                      a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2);
-                        c <- 2*atan2(sqrt(a),sqrt(1-a));
-                        R <- 6371;
-                        dx1 <- R*c
-                        
-              return(km / dx1)}
-
+#' Compute distance from kilometers into degrees
+#' 
+#' Function transformes the distance expressed in kilometers into degrees. This
+#' based on the GPS location of a point.
+#' 
+#' 
+#' @param lon Longitude of the GPS position
+#' @param lat Latitude of the GPS positiona
+#' @param km Value in Km to turn into degrees
+#' @note Computation of degrees is approximation based on the Haversine formula
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{distance}}, \code{\link{degree2Km}},
+#' \code{\link{lonLatRatio}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' lon <- -4
+#' lat <- 50
+#' km  <- 114.4897
+#' 
+#' km2Degree(lon,lat,km) #1.601833
+#' 
+#' @export km2Degree
+`km2Degree` <-
+function(lon,lat,km){
+                      x1 <- lon
+                      y1 <- lat
+                      
+                      a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2);
+                        c <- 2*atan2(sqrt(a),sqrt(1-a));
+                        R <- 6371;
+                        dx1 <- R*c
+                        
+              return(km / dx1)}
+
diff --git a/vmstools/R/labellingHauls.r b/vmstools/R/labellingHauls.r
index e32682a..b335748 100644
--- a/vmstools/R/labellingHauls.r
+++ b/vmstools/R/labellingHauls.r
@@ -1,36 +1,59 @@
-
-# assign an identifier in'HL_ID' to each of the fishing sequences
-# (based on SI_STATE, assuming the "h", "f", "s" coding)
-# (useful to count them in a grid...)
-labellingHauls <- function(tacsat){
-            tacsat$SI_STATE2                             <- tacsat$SI_STATE
-            tacsat$SI_STATE                              <- as.character(tacsat$SI_STATE)
-            tacsat[is.na(tacsat$SI_STATE), 'SI_STATE']   <- '1' # assign steaming
-            tacsat[tacsat$SI_STATE!='f', 'SI_STATE']     <- '1' # assign steaming
-            tacsat[tacsat$SI_STATE=='f', 'SI_STATE']     <- '2' # assign fishing
-            tacsat$SI_STATE                              <- as.numeric(tacsat$SI_STATE)
-            tacsat$HL_ID                              <- c(0, diff(tacsat$SI_STATE))   # init
-            tacsat$HL_ID                              <- cumsum(tacsat$HL_ID) # fishing sequences detected.
-            tacsat$SS_ID                              <- 1- tacsat$HL_ID  # steaming sequences detected.
-            tacsat$HL_ID                              <- cumsum(tacsat$SS_ID ) # fishing sequences labelled.
-            tacsat[tacsat$SI_STATE==1, 'HL_ID']       <- 0 # correct label 0 for steaming
-            tacsat$HL_ID                              <- factor(tacsat$HL_ID)
-            levels(tacsat$HL_ID) <- 0: (length(levels(tacsat$HL_ID))-1) # rename the id for increasing numbers from 0
-            tacsat$HL_ID                             <- as.character(tacsat$HL_ID)
-            # then assign a unique id
-            idx <- tacsat$HL_ID!=0
-            tacsat[idx, "HL_ID"] <- paste(
-                                    tacsat$VE_REF[idx], "_",
-                                    tacsat$LE_GEAR[idx], "_",
-                                    tacsat$HL_ID[idx],
-                                    sep="")
-           tacsat$SI_STATE                             <- tacsat$SI_STATE2
-           tacsat <- tacsat[, !colnames(tacsat) %in% c('SS_ID', 'SI_STATE2')] # remove useless column
-          return(tacsat)
-       }
-
-# label fishing sequecnes with a unique identifier (method based on SI_STATE)
-#tacsat <- labellingHauls(tacsat)
-
-
-
+# assign an identifier in'HL_ID' to each of the fishing sequences
+# (based on SI_STATE, assuming the "h", "f", "s" coding)
+# (useful to count them in a grid...)
+
+
+#' Labelling fishing operations
+#' 
+#' Labelling fishing sequences from the SI_STATE coding with an unique
+#' identifier
+#' 
+#' 
+#' @param tacsat a tacsat format with the SI_STATE coding informed for "h", "f"
+#' and "s"
+#' @return add a new column to the tacsat data.frame named HL_ID
+#' @author F. Bastardie
+#' @seealso \code{\link{activityTacsat}}
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat$SI_STATE <- 0
+#' tacsat$SI_SP    <- replace(tacsat$SI_SP, is.na(tacsat$SI_SP), 0)
+#' tacsat[tacsat$SI_SP >= 1.5 & tacsat$SI_SP <= 7.5,'SI_STATE']      <- 'f'
+#' tacsat[tacsat$SI_SP < 1.5,'SI_STATE']                              <- 'h'
+#' tacsat[tacsat$SI_SP > 7.5,'SI_STATE']                              <- 's'
+#' labellingHauls(tacsat)
+#' 
+#' @export labellingHauls
+labellingHauls <- function(tacsat){
+            tacsat$SI_STATE2                             <- tacsat$SI_STATE
+            tacsat$SI_STATE                              <- as.character(tacsat$SI_STATE)
+            tacsat[is.na(tacsat$SI_STATE), 'SI_STATE']   <- '1' # assign steaming
+            tacsat[tacsat$SI_STATE!='f', 'SI_STATE']     <- '1' # assign steaming
+            tacsat[tacsat$SI_STATE=='f', 'SI_STATE']     <- '2' # assign fishing
+            tacsat$SI_STATE                              <- as.numeric(tacsat$SI_STATE)
+            tacsat$HL_ID                              <- c(0, diff(tacsat$SI_STATE))   # init
+            tacsat$HL_ID                              <- cumsum(tacsat$HL_ID) # fishing sequences detected.
+            tacsat$SS_ID                              <- 1- tacsat$HL_ID  # steaming sequences detected.
+            tacsat$HL_ID                              <- cumsum(tacsat$SS_ID ) # fishing sequences labelled.
+            tacsat[tacsat$SI_STATE==1, 'HL_ID']       <- 0 # correct label 0 for steaming
+            tacsat$HL_ID                              <- factor(tacsat$HL_ID)
+            levels(tacsat$HL_ID) <- 0: (length(levels(tacsat$HL_ID))-1) # rename the id for increasing numbers from 0
+            tacsat$HL_ID                             <- as.character(tacsat$HL_ID)
+            # then assign a unique id
+            idx <- tacsat$HL_ID!=0
+            tacsat[idx, "HL_ID"] <- paste(
+                                    tacsat$VE_REF[idx], "_",
+                                    tacsat$LE_GEAR[idx], "_",
+                                    tacsat$HL_ID[idx],
+                                    sep="")
+           tacsat$SI_STATE                             <- tacsat$SI_STATE2
+           tacsat <- tacsat[, !colnames(tacsat) %in% c('SS_ID', 'SI_STATE2')] # remove useless column
+          return(tacsat)
+       }
+
+# label fishing sequecnes with a unique identifier (method based on SI_STATE)
+#tacsat <- labellingHauls(tacsat)
+
+
+
diff --git a/vmstools/R/landingsMaps2GIFanim.r b/vmstools/R/landingsMaps2GIFanim.r
index 216f726..daf4217 100644
--- a/vmstools/R/landingsMaps2GIFanim.r
+++ b/vmstools/R/landingsMaps2GIFanim.r
@@ -1,31 +1,74 @@
- # author F.Bastardie
- # automatic creation of .gif animation using library(animation)
- # need to install the ImageMagik freeware
-landingsMaps2GIFanim <- function(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"),
-                           spp=c("COD","SPR","HER","PLE","FLE","DAB","WHB")){
-require(animation)
-for(sp in spp){
-    for(met in list.files(file.path(idir, sp))){
-     for(what in c("weight","value")){
-           setwd(file.path(idir, sp, met,what,"quarter"))
-           filename <- paste("map_landings_",what,"_merged_vessels_",sp,"_",met,"_",sep='')
-           cat(paste(filename,"\n"))
-           wildcard = paste(filename, "*.", "jpeg", sep = "")
-           im.convert(wildcard, output = file.path(getwd(), paste("anim",".gif", sep='')),  # library(animation)
-                         convert = "convert",
-                          cmd.fun = system, clean = FALSE)   
-    } # end what
-  } # end met
- } # end sp
-return()
-}
-
- # pings2LandingMaps (all.merged=all.merged, sp="LE_EURO_COD",  output= file.path("C:","VMSanalysis", "FemernBelt"),
- #                   cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area
- #                       breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)
- #                       )
- # pings2LandingMaps (all.merged=all.merged, sp="LE_KG_COD",  output= file.path("C:","VMSanalysis", "FemernBelt"),
- #                   cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area
- #                       breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)
- #                       )
- # landingsMaps2GIFanim(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"), spp=c("COD","SPR") )
\ No newline at end of file
+ # author F.Bastardie
+ # automatic creation of .gif animation using library(animation)
+ # need to install the ImageMagik freeware
+
+
+#' generate some gif animations from the landings maps
+#' 
+#' generate some gif animations from the landings maps after having generated
+#' the landings maps from the merged table per year, per metier (level6), per
+#' metier-quarter all together
+#' 
+#' Using some facilities providen by the 'animation' R package. Need ImageMagik
+#' installed.
+#' 
+#' @param idir the full path of the jpegLandings folder
+#' @param spp vector of FAO codes for species
+#' @author Francois Bastardie
+#' @examples
+#' 
+#'  
+#' \dontrun{
+#' 
+#' 
+#' for(a.year in as.character(2005:2009)){
+#' 
+#'   # ...and load the merged output table for all vessels
+#'   load(file.path("C:","output",paste("all_merged__",a.year,".RData",sep='')))
+#' 
+#'   # generate the maps
+#'   pings2LandingsMaps (all.merged=all.merged, sp="LE_EURO_COD",
+#'       output= file.path("C:","VMSanalysis", "FemernBelt"),
+#'       cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0,
+#'       breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),
+#'       100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000))
+#'   }
+#' 
+#' # create some animations...
+#' # (need the 'animation' R package and ImageMagik installed)
+#' landingsMaps2GIFanim(idir = file.path("C:","VMSanalysis","FemernBelt",
+#'                      "jpegLandings"), spp=c("COD") )
+#' 
+#' 
+#'   }
+#' 
+#' 
+#' @export landingsMaps2GIFanim
+landingsMaps2GIFanim <- function(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"),
+                           spp=c("COD","SPR","HER","PLE","FLE","DAB","WHB")){
+require(animation)
+for(sp in spp){
+    for(met in list.files(file.path(idir, sp))){
+     for(what in c("weight","value")){
+           setwd(file.path(idir, sp, met,what,"quarter"))
+           filename <- paste("map_landings_",what,"_merged_vessels_",sp,"_",met,"_",sep='')
+           cat(paste(filename,"\n"))
+           wildcard = paste(filename, "*.", "jpeg", sep = "")
+           im.convert(wildcard, output = file.path(getwd(), paste("anim",".gif", sep='')),  # library(animation)
+                         convert = "convert",
+                          cmd.fun = system, clean = FALSE)   
+    } # end what
+  } # end met
+ } # end sp
+return()
+}
+
+ # pings2LandingMaps (all.merged=all.merged, sp="LE_EURO_COD",  output= file.path("C:","VMSanalysis", "FemernBelt"),
+ #                   cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area
+ #                       breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)
+ #                       )
+ # pings2LandingMaps (all.merged=all.merged, sp="LE_KG_COD",  output= file.path("C:","VMSanalysis", "FemernBelt"),
+ #                   cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area
+ #                       breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)
+ #                       )
+ # landingsMaps2GIFanim(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"), spp=c("COD","SPR") )
diff --git a/vmstools/R/lonLat2SpatialPolygons.r b/vmstools/R/lonLat2SpatialPolygons.r
index f60ac4a..24d1c98 100644
--- a/vmstools/R/lonLat2SpatialPolygons.r
+++ b/vmstools/R/lonLat2SpatialPolygons.r
@@ -1,95 +1,124 @@
-lonLat2SpatialPolygons <- function(SI_LONG=NULL,SI_LATI=NULL,lst=NULL){
-  if((is.null(SI_LONG)==TRUE | is.null(SI_LATI)==TRUE) & is.null(lst)==TRUE) stop("Specify either longitude and latitude vectors or a list containing a dataframe with specified longitude and latitude vectors")
-
-  #- Return spatial polygons object when SI_LONG and SI_LATI vectors are supplied
-  if(is.null(SI_LONG)==FALSE | is.null(SI_LATI)==FALSE){
-    if(length(SI_LONG) != length(SI_LATI)) stop("Length SI_LONG not equal to length SI_LATI")
-    brks      <- which(is.na(SI_LONG)==TRUE | is.na(SI_LATI) == TRUE)
-    storePols <- list()
-    #- If no breaks
-    if(length(brks)==0){
-      if((SI_LONG[1] != SI_LONG[length(SI_LONG)]) | (SI_LATI[1] != SI_LATI[length(SI_LATI)])){
-        coords <- coordinates(rbind(cbind(SI_LONG,SI_LATI),
-                                    cbind(SI_LONG[1],SI_LATI[1])))
-      } else {
-          coords <- coordinates(cbind(SI_LONG,SI_LATI))
-        }
-      pol         <- Polygon(coords)
-      pols        <- Polygons(list(pol),ID="1")
-      storePols[[1]] <- pols
-    } else {
-        #- If breaks
-        for(i in 1:length(brks)){
-          if(i == 1){
-            if((SI_LONG[1] != SI_LONG[(brks[i]-1)]) | (SI_LATI[1] != SI_LATI[(brks[i]-1)])){
-              coords <- coordinates(rbind(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)]),
-                                          cbind(SI_LONG[1],SI_LATI[1])))
-            } else {
-                coords <- coordinates(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)]))
-              }
-          } else {
-              if((SI_LONG[(brks[i-1]+1)] != SI_LONG[(brks[i]-1)]) | (SI_LATI[(brks[i-1]+1)] != SI_LATI[(brks[i]-1)])){
-                coords <- coordinates(rbind(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)]),
-                                            cbind(SI_LONG[(brks[i-1]+1)],SI_LATI[(brks[i-1]+1)])))
-              } else {
-                  coords <- coordinates(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)]))
-                }
-            }
-          pol         <- Polygon(coords)
-          pols        <- Polygons(list(pol),ID=i)
-          storePols[[i]] <- pols
-        }
-      }
-    spol        <- SpatialPolygons(storePols)
-  }
-  #- Return spatial polygons object when list with SI_LONG,SI_LATI dataframe is given
-  if(is.null(lst)==FALSE){
-    if(is.null(names(lst))==TRUE) names(lst) <- 1:length(lst)
-    storePols <- list()
-    counter   <- 1
-    for(j in 1:length(lst)){
-      brks      <- which(is.na(lst[[j]]$SI_LONG)==TRUE | is.na(lst[[j]]$SI_LATI) == TRUE)
-      #- If no breaks
-      if(length(brks)==0){
-        if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[nrow(lst[[j]])]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[nrow(lst[[j]])])){
-          coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI),
-                                      cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1])))
-        } else {
-            coords <- coordinates(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI))
-          }
-        pol         <- Polygon(coords)
-        pols        <- Polygons(list(pol),ID=counter)
-        storePols[[counter]] <- pols
-        counter <- counter+1
-      } else {
-          #- If breaks
-          if(length(which(diff(brks)==1)>0)) stop("Only 1 row of NA is allowed in between values of SI_LONG and SI_LATI")
-          for(i in 1:length(brks)){
-            if(i == 1){
-              if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[(brks[i]-1)])){
-                coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)]),
-                                            cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1])))
-              } else {
-                  coords <- coordinates(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)]))
-                }
-            } else {
-                if((lst[[j]]$SI_LONG[(brks[i-1]+1)] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[(brks[i-1]+1)] != lst[[j]]$SI_LATI[(brks[i]-1)])){
-                  coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)]),
-                                              cbind(lst[[j]]$SI_LONG[(brks[i-1]+1)],lst[[j]]$SI_LATI[(brks[i-1]+1)])))
-              } else {
-                    coords <- coordinates(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)]))
-                }
-              }
-            pol         <- Polygon(coords)
-            pols        <- Polygons(list(pol),ID=counter)
-            storePols[[counter]] <- pols
-            counter     <- counter + 1
-          }
-        }
-    }
-    #- Create spatial polygons
-    spol        <- SpatialPolygons(storePols)
-  }
-  return(spol)}
-        
-                         
\ No newline at end of file
+#' Creates a 'SpatialPolygons' object from longitude-latitude dataset
+#' 
+#' Takes one set of longitude-lattitude or a list of lon-lat objects and
+#' converts it into one SpatialPolygons object.
+#' 
+#' If the longitude and latitude set do not make a full round, the function
+#' will do that for you.
+#' 
+#' @param SI_LONG Set with longitude values
+#' @param SI_LATI Set with latitude values
+#' @param lst list of objects, each with longitude and latitude values
+#' @return Returns a 'SpatialPolygons' set from the package 'sp'
+#' @note May take a while when many polygons need to be created
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{SpatialPolygons}}
+#' @examples
+#' 
+#' data(europa)
+#' 
+#' eurPols     <- lonLat2SpatialPolygons(lst=lapply(as.list(sort(unique(europa$SID))),
+#'                         function(x){data.frame(
+#'                           SI_LONG=subset(europa,SID==x)$X,
+#'                           SI_LATI=subset(europa,SID==x)$Y)}))
+#' 
+#' area        <- lonLat2SpatialPolygons(SI_LONG=c(2,2.5,2.7,2.1),SI_LATI=c(54,54.2,55.8,55.6))
+#' plot(eurPols,col="green",xlim=c(-4,10),ylim=c(48,62)); axis(1);axis(2);box()
+#' plot(area,add=TRUE,col="red")
+#' 
+#' @export lonLat2SpatialPolygons
+lonLat2SpatialPolygons <- function(SI_LONG=NULL,SI_LATI=NULL,lst=NULL){
+  if((is.null(SI_LONG)==TRUE | is.null(SI_LATI)==TRUE) & is.null(lst)==TRUE) stop("Specify either longitude and latitude vectors or a list containing a dataframe with specified longitude and latitude vectors")
+
+  #- Return spatial polygons object when SI_LONG and SI_LATI vectors are supplied
+  if(is.null(SI_LONG)==FALSE | is.null(SI_LATI)==FALSE){
+    if(length(SI_LONG) != length(SI_LATI)) stop("Length SI_LONG not equal to length SI_LATI")
+    brks      <- which(is.na(SI_LONG)==TRUE | is.na(SI_LATI) == TRUE)
+    storePols <- list()
+    #- If no breaks
+    if(length(brks)==0){
+      if((SI_LONG[1] != SI_LONG[length(SI_LONG)]) | (SI_LATI[1] != SI_LATI[length(SI_LATI)])){
+        coords <- coordinates(rbind(cbind(SI_LONG,SI_LATI),
+                                    cbind(SI_LONG[1],SI_LATI[1])))
+      } else {
+          coords <- coordinates(cbind(SI_LONG,SI_LATI))
+        }
+      pol         <- Polygon(coords)
+      pols        <- Polygons(list(pol),ID="1")
+      storePols[[1]] <- pols
+    } else {
+        #- If breaks
+        for(i in 1:length(brks)){
+          if(i == 1){
+            if((SI_LONG[1] != SI_LONG[(brks[i]-1)]) | (SI_LATI[1] != SI_LATI[(brks[i]-1)])){
+              coords <- coordinates(rbind(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)]),
+                                          cbind(SI_LONG[1],SI_LATI[1])))
+            } else {
+                coords <- coordinates(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)]))
+              }
+          } else {
+              if((SI_LONG[(brks[i-1]+1)] != SI_LONG[(brks[i]-1)]) | (SI_LATI[(brks[i-1]+1)] != SI_LATI[(brks[i]-1)])){
+                coords <- coordinates(rbind(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)]),
+                                            cbind(SI_LONG[(brks[i-1]+1)],SI_LATI[(brks[i-1]+1)])))
+              } else {
+                  coords <- coordinates(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)]))
+                }
+            }
+          pol         <- Polygon(coords)
+          pols        <- Polygons(list(pol),ID=i)
+          storePols[[i]] <- pols
+        }
+      }
+    spol        <- SpatialPolygons(storePols)
+  }
+  #- Return spatial polygons object when list with SI_LONG,SI_LATI dataframe is given
+  if(is.null(lst)==FALSE){
+    if(is.null(names(lst))==TRUE) names(lst) <- 1:length(lst)
+    storePols <- list()
+    counter   <- 1
+    for(j in 1:length(lst)){
+      brks      <- which(is.na(lst[[j]]$SI_LONG)==TRUE | is.na(lst[[j]]$SI_LATI) == TRUE)
+      #- If no breaks
+      if(length(brks)==0){
+        if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[nrow(lst[[j]])]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[nrow(lst[[j]])])){
+          coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI),
+                                      cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1])))
+        } else {
+            coords <- coordinates(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI))
+          }
+        pol         <- Polygon(coords)
+        pols        <- Polygons(list(pol),ID=counter)
+        storePols[[counter]] <- pols
+        counter <- counter+1
+      } else {
+          #- If breaks
+          if(length(which(diff(brks)==1)>0)) stop("Only 1 row of NA is allowed in between values of SI_LONG and SI_LATI")
+          for(i in 1:length(brks)){
+            if(i == 1){
+              if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[(brks[i]-1)])){
+                coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)]),
+                                            cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1])))
+              } else {
+                  coords <- coordinates(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)]))
+                }
+            } else {
+                if((lst[[j]]$SI_LONG[(brks[i-1]+1)] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[(brks[i-1]+1)] != lst[[j]]$SI_LATI[(brks[i]-1)])){
+                  coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)]),
+                                              cbind(lst[[j]]$SI_LONG[(brks[i-1]+1)],lst[[j]]$SI_LATI[(brks[i-1]+1)])))
+              } else {
+                    coords <- coordinates(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)]))
+                }
+              }
+            pol         <- Polygon(coords)
+            pols        <- Polygons(list(pol),ID=counter)
+            storePols[[counter]] <- pols
+            counter     <- counter + 1
+          }
+        }
+    }
+    #- Create spatial polygons
+    spol        <- SpatialPolygons(storePols)
+  }
+  return(spol)}
+        
+                         
diff --git a/vmstools/R/lonLatRatio.r b/vmstools/R/lonLatRatio.r
index 26322d5..7949d10 100644
--- a/vmstools/R/lonLatRatio.r
+++ b/vmstools/R/lonLatRatio.r
@@ -1,10 +1,35 @@
-`lonLatRatio` <-
-    function(x1,lat){
-      #Based on the Haversine formula
-      #At the position, the y-position remains the same, hence, cos(lat)*cos(lat) instead of cos(lat) * cos(y2)
-      a <- cos(lat*pi/180)*cos(lat*pi/180)*sin((0.1*pi/180)/2)*sin((0.1*pi/180)/2);
-      c <- 2*atan2(sqrt(a),sqrt(1-a));
-      R <- 6371;
-      dx1 <- R*c
-
-    return(c(dx1/11.12))}
+#' Calculate the ratio between 1 degree in longitude versus 1 degree in
+#' latitude
+#' 
+#' The distance in Km on the longitude direction changes along the latitude
+#' direction. This function computes the ratio between 1 degree in the
+#' longitude direction depending on the latitude of the GPS position. Returns
+#' the ratio's of two GPS locations (two succeeding VMS datapoints). Can be
+#' used with 1 GPS position too, return NA for second value.
+#' 
+#' 
+#' @param lon Longitude of the two GPS positions
+#' @param lat Latitude of the two GPS positions
+#' @note Computation is approximation based on the Haversine formula
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{distance}}, \code{\link{degree2Km}},
+#' \code{\link{km2Degree}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' lon <- -4
+#' lat <- 50
+#' 
+#' lonLatRatio(lon,lat)
+#' 
+#' @export lonLatRatio
+`lonLatRatio` <-
+    function(x1,lat){
+      #Based on the Haversine formula
+      #At the position, the y-position remains the same, hence, cos(lat)*cos(lat) instead of cos(lat) * cos(y2)
+      a <- cos(lat*pi/180)*cos(lat*pi/180)*sin((0.1*pi/180)/2)*sin((0.1*pi/180)/2);
+      c <- 2*atan2(sqrt(a),sqrt(1-a));
+      R <- 6371;
+      dx1 <- R*c
+
+    return(c(dx1/11.12))}
diff --git a/vmstools/R/mapGrid.r b/vmstools/R/mapGrid.r
index b95e0bf..e813a14 100644
--- a/vmstools/R/mapGrid.r
+++ b/vmstools/R/mapGrid.r
@@ -1,123 +1,173 @@
-#mapGrid.r
-#andy south 12/2/09
-
-#to map grids input as SGDF
-#** OR  maybe just provide a wrapper to mapGriddedData instead ?
-#would need to add a better worldmap into that
-
-mapGrid <- function( sGDF
-                         , sPDF
-                         , we=""
-                         , ea=""
-                         , so=""
-                         , no=""
-                         , gridValName="fishing"
-                         , plotTitle = ""
-                         , numCats = 5
-                         , paletteCats = "heat.colors"
-                         , addLegend = TRUE
-                         , legendx='bottomleft'
-                         , legendncol = 1
-                         , legendtitle = "fishing activity"
-                         , plotPoints = FALSE
-                         , colPoints =1
-                         , legPoints = FALSE
-                         , colLand = 'sienna'
-                         , addICESgrid = FALSE
-                         , addScale = TRUE
-                         , outGridFile = ""  #name for output gridAscii
-                         , outPlot = ""  #name for output png
-                         , ... )
-{
-
-require(sp)
-require(maptools)
-
-par(mar=c(4,6,1,1))
- 
-xlim0=c(we,ea)
-ylim0=c(so,no)
-
-lstargs <- list(...)
-
-#dev.new()
-if(length(lstargs$breaks0)==0) {
-      breaks0 <- pretty(sGDF[[gridValName]],n=numCats)
-      } else{
-      breaks0 <- lstargs$breaks0
-      }
-
-# rainbow, heat.colors, etc.
-cols <- rev(do.call(paletteCats, list(length(breaks0)-1)))
-
-require(mapdata)
-#windows(8,7)         # what is this for ? 
-map("worldHires", add=FALSE,col=colLand,fill=TRUE, bg="white",  xlim=xlim0 + c(+0.1,-0.1), ylim=ylim0 + c(+0.1,-0.1), 
-regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium',
-'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria',
-'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco',
-'tunisia','algeria','egypt' ),  mar=c(2,6,2,2))
-
-  im <-  as.image.SpatialGridDataFrame(sGDF, attr=2)
-   image(im$x,im$y,im$z,  axes=FALSE, col=cols,  breaks = breaks0, 
-   xlim=xlim0 , ylim=ylim0, add=TRUE   )
-   
-#add ICES rectangles 
-if(addICESgrid){
-  for(i in seq(-15,50, by=1)) abline(v=i)
-  for(i in seq(0, 75, by=0.5)) abline(h=i)
-  }
-
-map("worldHires", add=TRUE, col=colLand, fill=TRUE, bg="white",  xlim=xlim0 , ylim=ylim0 , 
-regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium',
-'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria',
-'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco',
-'tunisia','algeria','egypt' ))
-
-
-box() # to put a box around the plot
-#mtext(paste(gear,year),font=4,line=-1.5)
-axis(1)
-axis(2, las=2)
- if(we>0){
-   mtext("Degree East", side=1, line=2)
-   } else{
-   mtext("Degree West", side=1, line=2)
-   }
- if(no>0){
-   mtext("Degree North", side=2, line=3)
- } else{
-   mtext("Degree South", side=2, line=3)
-   }
-
- 
-
-# add a scale
-if(addScale) map.scale(x=xlim0[2]-(xlim0[2]-xlim0[1])/2, y=ylim0[1], ratio=FALSE)
-
-#to add points (can obscure grid)
-if (plotPoints) {
- if(length(colPoints)!=1) {
-   colPoints <- factor(colPoints)
-   a.legPoints <- levels(colPoints)
-   levels(colPoints) <- colors()[(1:length(levels(colPoints))) *10] 
-   points(sPDF, pch=16, col=as.character(colPoints),cex=0.5)
-
-     if(length(legPoints)!=0){
-     legend(x='bottomright', legend=a.legPoints, pch = 16, col=levels(colPoints), title="", ncol=2, bg="white", pt.cex=1)
-     }
-   }else{
-   points(sPDF, pch='.', col=colPoints,cex=0.1)   
-   }
- }
-   
-#legend(x='bottomleft', legend=breaks[1:(length(breaks)-1)], pch = 22, pt.bg=cols, title="fishing activity",bg="white",pt.cex=2 )
-legend(x=legendx, legend=breaks0[1:(length(breaks0)-1)], pch = 22, pt.bg=cols, title=legendtitle, ncol=legendncol, bg="white",pt.cex=2 )
-
-
-#to add plotTitle
-if (plotTitle != "") mtext(plotTitle)
-
-
-
-} #end of mapGrid
\ No newline at end of file
+#mapGrid.r
+#andy south 12/2/09
+
+#to map grids input as SGDF
+#** OR  maybe just provide a wrapper to mapGriddedData instead ?
+#would need to add a better worldmap into that
+
+
+
+#' function to map grids
+#' 
+#' Accepts an input of a \code{SpatialGridDataFrame} Plots a map of the grid
+#' and optionally outputs to a gridAscii file and/or an image.
+#' 
+#' 
+#' @param sGDF a \code{SpatialGridDataFrame}
+#' @param sPDF an optional \code{SpatialPointsDataFrame} plotted if
+#' \code{plotPoints=TRUE}
+#' @param we western bounds of the area to plot, if not specified taken from
+#' the \code{sGDF}
+#' @param ea eastern bounds of the area to plot, if not specified taken from
+#' the \code{sGDF}
+#' @param so southern bounds of the area to plot, if not specified taken from
+#' the \code{sGDF}
+#' @param no northern bounds of the area to plot, if not specified taken from
+#' the \code{sGDF}
+#' @param gridValName the name of the attribute column to plot from the
+#' \code{SpatialGridDataFrame}
+#' @param plotTitle optional title to add to the plot
+#' @param numCats how many categories to classify grid values into for map plot
+#' (uses\code{pretty()}) classification)
+#' @param paletteCats color pallete to use
+#' @param addLegend whether to add a legend to the plot
+#' @param legendx position of legend should be one of 'bottomright', 'bottom',
+#' 'bottomleft', 'left', 'topleft', 'top', 'topright', 'right', 'center'
+#' @param legendncol number of columns in the legend
+#' @param legendtitle legend title
+#' @param plotPoints whether to add the original points to the plot
+#' @param colPoints color of points to plot
+#' @param legPoints Logical. Points in legend
+#' @param colland color of land
+#' @param addICESgrid Logical. Adding ICES grid on top
+#' @param addScale Logical. Adding axes
+#' @param outGridFile optional name for a gridAscii file to be created from the
+#' grid
+#' @param outPlot optional name for a png file to be created from the plot
+#' @param \dots NOT used yet
+#' @author Andy South
+#' @seealso \code{vmsGridCreate()}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' #mapGrid(dF, nameLon = "POS_LONGITUDE", nameLat = "POS_LATITUDE",
+#' #        cellsizeX = 0.5, cellsizeY = 0.5,legendx='bottomright',
+#' #        plotPoints=TRUE )
+#' 
+#' @export mapGrid
+mapGrid <- function( sGDF
+                         , sPDF
+                         , we=""
+                         , ea=""
+                         , so=""
+                         , no=""
+                         , gridValName="fishing"
+                         , plotTitle = ""
+                         , numCats = 5
+                         , paletteCats = "heat.colors"
+                         , addLegend = TRUE
+                         , legendx='bottomleft'
+                         , legendncol = 1
+                         , legendtitle = "fishing activity"
+                         , plotPoints = FALSE
+                         , colPoints =1
+                         , legPoints = FALSE
+                         , colLand = 'sienna'
+                         , addICESgrid = FALSE
+                         , addScale = TRUE
+                         , outGridFile = ""  #name for output gridAscii
+                         , outPlot = ""  #name for output png
+                         , ... )
+{
+
+require(sp)
+require(maptools)
+
+par(mar=c(4,6,1,1))
+ 
+xlim0=c(we,ea)
+ylim0=c(so,no)
+
+lstargs <- list(...)
+
+#dev.new()
+if(length(lstargs$breaks0)==0) {
+      breaks0 <- pretty(sGDF[[gridValName]],n=numCats)
+      } else{
+      breaks0 <- lstargs$breaks0
+      }
+
+# rainbow, heat.colors, etc.
+cols <- rev(do.call(paletteCats, list(length(breaks0)-1)))
+
+require(mapdata)
+#windows(8,7)         # what is this for ? 
+map("worldHires", add=FALSE,col=colLand,fill=TRUE, bg="white",  xlim=xlim0 + c(+0.1,-0.1), ylim=ylim0 + c(+0.1,-0.1), 
+regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium',
+'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria',
+'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco',
+'tunisia','algeria','egypt' ),  mar=c(2,6,2,2))
+
+  im <-  as.image.SpatialGridDataFrame(sGDF, attr=2)
+   image(im$x,im$y,im$z,  axes=FALSE, col=cols,  breaks = breaks0, 
+   xlim=xlim0 , ylim=ylim0, add=TRUE   )
+   
+#add ICES rectangles 
+if(addICESgrid){
+  for(i in seq(-15,50, by=1)) abline(v=i)
+  for(i in seq(0, 75, by=0.5)) abline(h=i)
+  }
+
+map("worldHires", add=TRUE, col=colLand, fill=TRUE, bg="white",  xlim=xlim0 , ylim=ylim0 , 
+regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium',
+'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria',
+'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco',
+'tunisia','algeria','egypt' ))
+
+
+box() # to put a box around the plot
+#mtext(paste(gear,year),font=4,line=-1.5)
+axis(1)
+axis(2, las=2)
+ if(we>0){
+   mtext("Degree East", side=1, line=2)
+   } else{
+   mtext("Degree West", side=1, line=2)
+   }
+ if(no>0){
+   mtext("Degree North", side=2, line=3)
+ } else{
+   mtext("Degree South", side=2, line=3)
+   }
+
+ 
+
+# add a scale
+if(addScale) map.scale(x=xlim0[2]-(xlim0[2]-xlim0[1])/2, y=ylim0[1], ratio=FALSE)
+
+#to add points (can obscure grid)
+if (plotPoints) {
+ if(length(colPoints)!=1) {
+   colPoints <- factor(colPoints)
+   a.legPoints <- levels(colPoints)
+   levels(colPoints) <- colors()[(1:length(levels(colPoints))) *10] 
+   points(sPDF, pch=16, col=as.character(colPoints),cex=0.5)
+
+     if(length(legPoints)!=0){
+     legend(x='bottomright', legend=a.legPoints, pch = 16, col=levels(colPoints), title="", ncol=2, bg="white", pt.cex=1)
+     }
+   }else{
+   points(sPDF, pch='.', col=colPoints,cex=0.1)   
+   }
+ }
+   
+#legend(x='bottomleft', legend=breaks[1:(length(breaks)-1)], pch = 22, pt.bg=cols, title="fishing activity",bg="white",pt.cex=2 )
+legend(x=legendx, legend=breaks0[1:(length(breaks0)-1)], pch = 22, pt.bg=cols, title=legendtitle, ncol=legendncol, bg="white",pt.cex=2 )
+
+
+#to add plotTitle
+if (plotTitle != "") mtext(plotTitle)
+
+
+
+} #end of mapGrid
diff --git a/vmstools/R/maxRangeCI.r b/vmstools/R/maxRangeCI.r
index 7e29653..c2722e1 100644
--- a/vmstools/R/maxRangeCI.r
+++ b/vmstools/R/maxRangeCI.r
@@ -1,53 +1,99 @@
-
-
-maxRangeCI <- function(x,y,Time,speed){
-
-  #Pre-Calculation to speed up the code
-  pi180 <- pi/180
-  cosy1 <- cos(y[1]*pi180)
-  cosy2 <- cos(y[2]*pi180)
-  
-  #Calculate maximum distance in km
-  dmax <- Time/60*sum(speed,na.rm=TRUE)/2*1.852
-  
-  #Calculate d from Haversine function
-  d   <- distance(x[1],y[1],x[2],y[2])
-  
-  #Calculate a and b as in Mills et al. 2006 paper
-    warn<- 0
-  if(d >= dmax){
-    warning(paste("Distance too far to reach with current speed estimate ",round(x,3)," ",round(y,3),"\n"))
-    dmax <- d
-    warn <- 1
-  }
-  a <- dmax/2
-  b <- sqrt((dmax^2 - d^2)/4)
-  
-  if(d == 0){
-    o <- 0
-  } else {
-    dx      <- (x[2] - x[1])*pi180
-    dy      <- (y[2] - y[1])*pi180
-    o       <- atan2(sin(dx)*cosy2,cosy1*sin(y[2]*pi180)-sin(y[1]*pi180)*cosy2*cos(dx))
-    angles  <- (o*(180/pi)) %% 360
-    
-    angle2  <- ifelse(angles >= 0 & angles < 180, 90 - angles,270-angles)
-    o       <- angle2*(pi180)
-  }
-  #See also: http://www.movable-type.co.uk/scripts/latlong.html
-  Bx        <- cosy2*cos((x[2]-x[1])*pi180)
-  By        <- cosy2*sin((x[2]-x[1])*pi180)
-  mid.x     <- (x[1]*pi180) + atan2(By,cosy1+Bx)
-  mid.y     <- atan2(sin(y[1]*pi180) + sin(y[2]*pi180),sqrt((cosy1+Bx)^2 + By^2))
-  mid.x     <- mid.x*180/pi
-  mid.y     <- mid.y*180/pi
-
-  a         <- c(km2Degree(mid.x,mid.y,a),a/111.2)
-  b         <- c(km2Degree(mid.x,mid.y,b),b/111.2)
-  
-  #See also Pfoser and Jensen 1999 Capturing the Uncertainty of Moving-Object representation
-  u         <- 0:360*pi180
-  xres      <- mid.x + a[1] * cos(o) * cos(u) - b[1] * sin(o) * sin(u)
-  yres      <- mid.y + a[2] * sin(o) * cos(u) + b[2] * cos(o) * sin(u)
-
-  return(list(matrix(c(xres,yres),ncol=2),dmax,warn))}
\ No newline at end of file
+#' Computation of outer range of vessel trajectory between two succeeding
+#' points
+#' 
+#' A vessel can, based on an assumed speed and time interval, only travel a
+#' certain maximum range between two succeeding VMS / GPS points. This outer
+#' range can be describes as an ellipse surrounding these two points. The outer
+#' region returned is a matrix of 360 x-y coordinates. As well, the maximum
+#' distance able to travel depending on the speed and time interval is returned
+#' as is a warning if the measured distance between the two succeeding points
+#' is larger than the possible distance able to travel in the amount of time
+#' and speed given, indicating that the vessel must have travelled faster than
+#' the given speeds.
+#' 
+#' A list is returned with in the first element the 360 x-y coordinates of the
+#' outer region. In the second element, the maximum distance able to travel
+#' given the speed and time interval, is returned. The third element holds a
+#' warning if the maximum distance computed is exceeded. If warn equals 0, the
+#' maximum distance is not exceeded. If warn equals 1, this distance is
+#' exceeded.
+#' 
+#' @param x Longitudes of the GPS positions
+#' @param y Latitudes of the GPS positions
+#' @param Time Time in minutes between the succeeding datapoints
+#' @param speed Speeds at the GPS positions in nots
+#' @note Used within the plotCIinterpolation() function.
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{plotCIinterpolation}}, \code{\link{interpolateTacsat}},
+#' \code{\link{N1p0}}, \code{\link{plotInterpolation}}
+#' @references Pfoser and Jensen 1999 Capturing the Uncertainty of
+#' Moving-Object Representations, Mills et al. 2006 Estimating high resolution
+#' trawl fishing effort from satellite-based vessel monitoring system data,
+#' Hintzen et al. 2010 Improved estimation of trawling tracks using cubic
+#' Hermite spline interpolationof position registration data
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat <- formatTacsat(tacsat)
+#' #shorten the tacsat file for example use and add date-time combination
+#' tacsat <- tacsat[1:10,]
+#' lon <- tacsat$SI_LONG[c(3,4)]
+#' lat <- tacsat$SI_LATI[c(3,4)]
+#' #Calculate maximum range to be used within Confidence Interval calculation
+#' timeDiff <- sum(an(unlist(strsplit(tacsat$SI_TIME[4],":"))) * c(60,1,1/60))
+#'             - sum(an(unlist(strsplit(tacsat$SI_TIME[3],":"))) * c(60,1,1/60))
+#' res      <- maxRangeCI(lon,lat,timeDiff,tacsat$SI_SP[c(3,4)])
+#' 
+#' 
+#' @export maxRangeCI
+maxRangeCI <- function(x,y,Time,speed){
+
+  #Pre-Calculation to speed up the code
+  pi180 <- pi/180
+  cosy1 <- cos(y[1]*pi180)
+  cosy2 <- cos(y[2]*pi180)
+  
+  #Calculate maximum distance in km
+  dmax <- Time/60*sum(speed,na.rm=TRUE)/2*1.852
+  
+  #Calculate d from Haversine function
+  d   <- distance(x[1],y[1],x[2],y[2])
+  
+  #Calculate a and b as in Mills et al. 2006 paper
+    warn<- 0
+  if(d >= dmax){
+    warning(paste("Distance too far to reach with current speed estimate ",round(x,3)," ",round(y,3),"\n"))
+    dmax <- d
+    warn <- 1
+  }
+  a <- dmax/2
+  b <- sqrt((dmax^2 - d^2)/4)
+  
+  if(d == 0){
+    o <- 0
+  } else {
+    dx      <- (x[2] - x[1])*pi180
+    dy      <- (y[2] - y[1])*pi180
+    o       <- atan2(sin(dx)*cosy2,cosy1*sin(y[2]*pi180)-sin(y[1]*pi180)*cosy2*cos(dx))
+    angles  <- (o*(180/pi)) %% 360
+    
+    angle2  <- ifelse(angles >= 0 & angles < 180, 90 - angles,270-angles)
+    o       <- angle2*(pi180)
+  }
+  #See also: http://www.movable-type.co.uk/scripts/latlong.html
+  Bx        <- cosy2*cos((x[2]-x[1])*pi180)
+  By        <- cosy2*sin((x[2]-x[1])*pi180)
+  mid.x     <- (x[1]*pi180) + atan2(By,cosy1+Bx)
+  mid.y     <- atan2(sin(y[1]*pi180) + sin(y[2]*pi180),sqrt((cosy1+Bx)^2 + By^2))
+  mid.x     <- mid.x*180/pi
+  mid.y     <- mid.y*180/pi
+
+  a         <- c(km2Degree(mid.x,mid.y,a),a/111.2)
+  b         <- c(km2Degree(mid.x,mid.y,b),b/111.2)
+  
+  #See also Pfoser and Jensen 1999 Capturing the Uncertainty of Moving-Object representation
+  u         <- 0:360*pi180
+  xres      <- mid.x + a[1] * cos(o) * cos(u) - b[1] * sin(o) * sin(u)
+  yres      <- mid.y + a[2] * sin(o) * cos(u) + b[2] * cos(o) * sin(u)
+
+  return(list(matrix(c(xres,yres),ncol=2),dmax,warn))}
diff --git a/vmstools/R/mcp.area.r b/vmstools/R/mcp.area.r
index 2d48bab..90b02ce 100644
--- a/vmstools/R/mcp.area.r
+++ b/vmstools/R/mcp.area.r
@@ -1,44 +1,58 @@
-#This is taken from gpclib package
-mcp.area <- function(xy, id, percent = seq(20, 100, by = 5),
-                     unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){
-    unin <- match.arg(unin)
-    unout <- match.arg(unout)
-    if (length(id) != nrow(xy))
-        stop("xy and id should be of the same length")
-    xy <- xy[!is.na(xy[, 1]), ]
-    xy <- xy[!is.na(xy[, 2]), ]
-    id <- id[!is.na(xy[, 1])]
-    id <- id[!is.na(xy[, 2])]
-    lev <- percent
-    res <- list()
-    ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id)))
-    lixy <- split(xy, id)
-    le <- names(lixy)
-    for (i in 1:length(lev)) {
-        ar[i, ] <- unlist(lapply(lixy, function(z) {
-            res <- mcp(z, rep(1, nrow(z)), percent = lev[i])
-            class(res) <- "data.frame"
-            return(area.poly(as(res[, 2:3], "gpc.poly")))
-        }))
-    }
-    ar <- as.data.frame(ar)
-    names(ar) <- le
-    if (unin == "m") {
-        if (unout == "ha")
-            ar <- ar/10000
-        if (unout == "km2")
-            ar <- ar/1e+06
-    }
-    if (unin == "km") {
-        if (unout == "ha")
-            ar <- ar * 100
-        if (unout == "m2")
-            ar <- ar * 1e+06
-    }
-    row.names(ar) <- lev
-    class(ar) <- c("hrsize", "data.frame")
-    attr(ar, "units") <- unout
-    if (plotit)
-        plot(ar)
-    return(ar)
-}
+#This is taken from gpclib package
+
+
+#' Hidden function to calculate Minimum Convex Polygon
+#' 
+#' 
+#' 
+#' @param xy
+#' @param id
+#' @param percent
+#' @param unin
+#' @param unout
+#' @param plotit
+#' @author adehabitat
+#' @export mcp.area
+mcp.area <- function(xy, id, percent = seq(20, 100, by = 5),
+                     unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){
+    unin <- match.arg(unin)
+    unout <- match.arg(unout)
+    if (length(id) != nrow(xy))
+        stop("xy and id should be of the same length")
+    xy <- xy[!is.na(xy[, 1]), ]
+    xy <- xy[!is.na(xy[, 2]), ]
+    id <- id[!is.na(xy[, 1])]
+    id <- id[!is.na(xy[, 2])]
+    lev <- percent
+    res <- list()
+    ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id)))
+    lixy <- split(xy, id)
+    le <- names(lixy)
+    for (i in 1:length(lev)) {
+        ar[i, ] <- unlist(lapply(lixy, function(z) {
+            res <- mcp(z, rep(1, nrow(z)), percent = lev[i])
+            class(res) <- "data.frame"
+            return(area.poly(as(res[, 2:3], "gpc.poly")))
+        }))
+    }
+    ar <- as.data.frame(ar)
+    names(ar) <- le
+    if (unin == "m") {
+        if (unout == "ha")
+            ar <- ar/10000
+        if (unout == "km2")
+            ar <- ar/1e+06
+    }
+    if (unin == "km") {
+        if (unout == "ha")
+            ar <- ar * 100
+        if (unout == "m2")
+            ar <- ar * 1e+06
+    }
+    row.names(ar) <- lev
+    class(ar) <- c("hrsize", "data.frame")
+    attr(ar, "units") <- unout
+    if (plotit)
+        plot(ar)
+    return(ar)
+}
diff --git a/vmstools/R/mergeEflalo2Pings.r b/vmstools/R/mergeEflalo2Pings.r
index f1c2f96..6e0ca7e 100644
--- a/vmstools/R/mergeEflalo2Pings.r
+++ b/vmstools/R/mergeEflalo2Pings.r
@@ -1,1079 +1,1299 @@
-
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-# A 'R' ROUTINE FOR THE COUPLING OF VMS AND LOGBOOKS
-# WP4 - Lot2 EU tender VMS/LOGBOOKS COUPLING
-# author: Francois Bastardie (DTU- Aqua; fba@aqua.dtu.dk)
-# January 2010
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
-
-
-
-
-
-
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!MERGE LOGBOOKS WITH VMS PER VESSEL!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-
-
-#!!!!!!!!!!!!!!!!!!!!!#
-#!!!!!!!!!!!!!!!!!!!!!#
-mergeEflalo2Pings <-
-           function(eflalo, tacsat, vessels=unique(eflalo$VE_REF), general=list(output.path=file.path("C:"),
-                     visual.check=TRUE, detectFishing=FALSE, speed="segment", what.speed="calculated", conserve.all=TRUE,
-                       ), ...){
-
-  lstargs <-  as.list( sys.call() ) # equivalent to lstargs <- list(...) but suppress the r cmd build warning? 
-
-  # create required folders for outputs
-  cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n")
-  dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777")
-
-
- #!!!!!!!!!!!!!!!!!!!!!#
- #!!!!!!!!!!!!!!!!!!!!!#
- # utils--
- collapse.all.columns <- function (obj, columns= seq(ncol(obj)) ){
-            eval(parse(text=paste('paste(obj[,', paste(columns,collapse='] ,"#", obj[,'), '],sep=\'\')', sep='')))  }
- uncollapse.column <-  function(obj, column="coll"){
-            dd<- strsplit(as.character(obj[,column]),"#") ; nco <- length(dd[[1]]) ; dd<- unlist(dd)
-            res <- eval(parse(text=paste('data.frame(',paste('dd[seq(',1:nco,',nrow(obj)*nco,by=nco)]', collapse=','),')')))
-            colnames(res) <- paste("col",1:nco,sep='')
-            return(res)
-            }
-
-  #utils--
-  # FUNCTION TO CREATE A SPATIAL GRID
-  # 'xx' have a 'SI_LATI' and a 'SI_LONG' columns
-  assignPointsToSpatialGrid <- function(xx){
-
-    xx <- xx[,!colnames(xx) %in% c("icessquare","icessquare.vms") ]  # remove
-    xx <- cbind.data.frame(xx, icessquare= rep(0,nrow(xx)))
-
-
-    rlong      <- range(anf(xx$SI_LONG),na.rm=TRUE)
-    vect.long  <- signif(seq(floor(rlong[1]), ceiling(rlong[2]), by=1),4)   # long (x)
-    label.long <- rep(paste(rep(LETTERS,each=10),0:9,sep=""),each=1)
-    names(label.long) <- signif(seq(-50, 209, by=1),4)   # long (x)
-    label.long <- label.long[!is.na(names(label.long))]  # => correspondance long (-50 to 209) / sq letter (A0 to Z9)
-    label.long <- label.long[as.character(vect.long)]
-    rlat      <- range(anf(xx$SI_LATI), na.rm=TRUE)
-    vect.lat   <- signif(seq(floor(rlat[1]), ceiling(rlat[2]),by=0.5),4) # lat  (y)
-    label.lat  <- rep(paste(seq(1,75,1)),each=1)
-    names(label.lat) <-   paste(signif(seq(36,73, by=0.5),4))
-    label.lat <- label.lat[!is.na(names(label.lat))] # => correspondance lat (36 to 73) / sq number (1 to 75)
-    label.lat <- label.lat[as.character(vect.lat)]
-    vect.label <- paste(rep(label.lat,each=length(label.long)),"",label.long,sep="")
-    xx[,"SI_RECT"] <- paste(label.lat [findInterval(anf(xx[,"SI_LATI"]), vect.lat)] , label.long [findInterval(anf(xx[,"SI_LONG"]), vect.long)], sep="")
-
-   return(xx)
-   }
-
-
-   #!!!!!!!!!!!!!!!!!!!!!#
-   #utils--
-   # for managing NA on logbook side
-   # (from vms trip.sq without corresponding logbook trip.sq e.g. because no declaration in sq because only steaming time inside)
-   # we need to inform back the specificity of the vessel from logbook using info from the same trip i.e. vesselid+FT_REF
-   retrieveOnBkSide <- function(merged, type.data){
-      idx <- which(merged$LE_MET_level6=="NA")
-      merged.NA <- merged[idx,] # input (only the trip.sq with NA for the logbook part)
-
-      for (td in type.data){
-         map <- tapply(merged[, td ], paste(merged$VE_REF, merged$FT_REF),
-                             function(i) {ss<- unique(as.character(i)) ; ss[ss!="NA"][1]})
-         merged.NA[, td ] <- factor(paste(merged.NA$VE_REF,merged.NA$FT_REF))
-         levels(merged.NA[, td ]) <- map[levels(merged.NA[, td ])]
-         }
-      if(nrow(merged.NA)>0) merged.NA$flag <- 4 # flag on meth
-      merged[idx,] <- merged.NA # output
-      return(merged)
-      }
-
-      #!#!##!#!##!#!##!#!##!#!##!#!#
-      #!#!##!#!##!#!##!#!##!#!##!#!#
-      #!#!##!#!##!#!##!#!##!#!##!#!#
-      #!#!##!#!##!#!##!#!##!#!##!#!#
-      #!#!##!#!##!#!##!#!##!#!##!#!#
-      all.vesselid     <- as.character(unique(eflalo[anf(eflalo$VE_LEN)>=0,]$VE_REF))
-      all.vesselid     <- all.vesselid[!is.na(all.vesselid)] # e.g. when VE_LEN at NA exists
-      if(length(vessels)!=0) all.vesselid <- vessels
-       # => IF ARG INFORMED, THEN KEEP ONLY ONE OR SEVERAL VESSELS AS NEEDED....
-
-      for(a.vesselid in all.vesselid){  # PER VESSEL
-                cat(paste(a.vesselid,"\n", sep="" ))
-
-         #----------
-         #----------
-         #----------
-         #----------
-         #----------
-         #----------
-         # LOGBOOK INPUT
-         logbk.this.vessel            <- eflalo[eflalo$VE_REF %in% a.vesselid,]
-         logbk.this.vessel$LE_RECT    <- factor(logbk.this.vessel$LE_RECT)
-         logbk.this.vessel$VE_REF     <- factor(logbk.this.vessel$VE_REF)
-         logbk.this.vessel$VE_FLT     <- factor(logbk.this.vessel$VE_FLT)
-
-
-         # automatic detection of a.year
-         general$a.year <-   format(strptime(  paste(logbk.this.vessel$FT_DDAT[1]) , tz='GMT',  "%e/%m/%Y" ), "%Y")
-
-           # departure time
-           logbk.this.vessel$LE_DTIME <- as.POSIXct(  paste(logbk.this.vessel$FT_DDAT, logbk.this.vessel$FT_DTIME) ,
-                                                                tz='GMT',  "%e/%m/%Y %H:%M" )
-           # arrival time
-           logbk.this.vessel$LE_LTIME <- as.POSIXct(  paste(logbk.this.vessel$FT_LDAT, logbk.this.vessel$FT_LTIME) ,
-                                                                tz='GMT',  "%e/%m/%Y %H:%M" )
-           # catch.date
-           logbk.this.vessel$LE_CTIME <- as.POSIXct(  paste(logbk.this.vessel$LE_CDAT) , tz='GMT',  "%e/%m/%Y" )
-
-           # mid time bk trips
-           LE_MIDTIME <- rep(NA, nrow(logbk.this.vessel))
-           dep <- logbk.this.vessel$LE_DTIME +10  # we artificially add +10min because bug in R if mid-time is 00:00:00
-           arr <- logbk.this.vessel$LE_LTIME +1
-           for(r in 1:length(dep)){
-              LE_MIDTIME[r] <- as.character(seq(from=dep[r], to=arr[r], length.out = 3)[2])
-              }
-           logbk.this.vessel$LE_MIDTIME          <-  LE_MIDTIME
-
-           if(!"FT_REF" %in% colnames(logbk.this.vessel) ) {
-             logbk.this.vessel$FT_REF              <-  factor(LE_MIDTIME) # init
-             levels(logbk.this.vessel$FT_REF)      <- 1:length(logbk.this.vessel$FT_REF) # assign a FT_REF code
-             }   # only if FT_REF is actually not already informed
-
-
-
-
-         #=> LOGBOOK (EFLALO) INPUT REQUIRES AT LEAST,
-         #     'VE_REF',  FT_DDAT, FT_DTIME, FT_LDAT, FT_LTIME, FT_CDAT,
-         #  'LE_SP_KG' (etc.), 'LE_RECT', 'VE_FLT' AND 'LE_MET_level6', 'LE_GEAR' COLUMNS
-         #
-
-         #----------
-         #----------
-         #----------
-         #----------
-         #----------
-         #----------
-         # VMS INPUT: load traj with 'at sea' pings SI_STATE informed
-         # ABSOLUTELY REQUIRED: c("VE_REF","SI_LATI","SI_LONG", "SI_DATE", "SI_TIME", "SI_FT", "SI_HARB", "SI_STATE")
-
-
-         if(a.vesselid %in% unique(tacsat$VE_REF)){
-
-         tacsat.this.vessel <- tacsat[tacsat$VE_REF == a.vesselid,] # subset for this vessel
-         tacsat.this.vessel$VE_REF <- factor(tacsat.this.vessel$VE_REF)
-
-
-         # if does not exist, add SI_DATIM for handling the time in R
-          tacsat.this.vessel$SI_TIME <- as.character(tacsat.this.vessel$SI_TIME)
-          tacsat.this.vessel[tacsat.this.vessel$SI_TIME=="24:00", "SI_TIME"] <- "00:00"  # debug
-          tacsat.this.vessel[tacsat.this.vessel$SI_DATE=="29/02/1801", "SI_DATE"] <- "01/03/1801"  # debug
-          tacsat.this.vessel$SI_DATIM <- as.POSIXct(  paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME) ,
-                                 tz='GMT',   "%d/%m/%Y %H:%M" )
-          tacsat.this.vessel <- tacsat.this.vessel[!is.na(tacsat.this.vessel$SI_DATIM),]   # debug e.g. when 29/02/1801
-         
-         
-         # keep only the essential
-         vms.this.vessel  <- tacsat.this.vessel [, c("VE_REF","SI_LATI","SI_LONG",
-                          "SI_DATIM","SI_FT", "SI_SP", "SI_HE", "SI_HARB", "SI_STATE")]
-         rm(tacsat.this.vessel); gc(reset=TRUE)
-         vms.this.vessel$VE_REF   <- factor(vms.this.vessel$VE_REF)
-
-         vms.this.vessel$idx  <- 1:nrow(vms.this.vessel) # label for each ping
-
-
-
-
-         # filter if vessel with a bad vms
-         to.remove.because.deficient.vms <- any(is.na(vms.this.vessel$SI_FT))
-         to.remove.because.not.enough.vms.trips <- length(unique(vms.this.vessel$SI_FT))< 2  # nb vms trips < 2
-         if(length(unique(vms.this.vessel$SI_FT))<2) warning('need more than 1 trip in SI_FT')
-
-         # filter if vessel with a bad logbook
-         to.remove.because.pble.lgbk <- length(unique(logbk.this.vessel$FT_REF))< 2  # nb logbk trips < 2
-
-         # then...
-         a.flag <- to.remove.because.deficient.vms ||  to.remove.because.not.enough.vms.trips || to.remove.because.pble.lgbk
-
-         ## remove FT_REF and SI_MIDTIME if it exists
-         vms.this.vessel <- vms.this.vessel[, !colnames(vms.this.vessel) %in% c("FT_REF", "SI_MIDTIME")]
-
-
-
-        if(a.flag==FALSE) {  # i.e. vms-equipped
-
-           if(all(is.na(vms.this.vessel$SI_STATE)) && general$detectFishing==FALSE)
-                  stop('the SI_STATE column has to be informed before making the merging')
-           if(all(is.na(logbk.this.vessel$VE_FLT)))
-                  stop('the VE_FLT column has to be informed before making the merging')
-
-         # alias
-         .logbk <- logbk.this.vessel
-         .vms   <- vms.this.vessel
-
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         #!  DO THE LINK - APPROACH 1 #!#!!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-       NIELS <- FALSE
-       if(NIELS){
-           eftim <- .logbk[which(duplicated(.logbk$FT_REF)==FALSE),c("LE_DTIME","LE_LTIME","FT_REF")]
-           dtime <- eftim[,1]
-           ltime <- eftim[,2]
-           stime <- .vms$SI_DATIM
-           tripn <- eftim[,3]
-
-
-
-                              smdtime <- t(outer(stime,dtime,"-"))
-                              gtltime <- outer(ltime,stime,"-")
-
-                              #-Find first point where tacsat time is greater or equal to departure time and smaller than arrival time
-                              st <- apply(smdtime,1,function(x){which(x>=0)[1]})
-                              en <- apply(gtltime,1,function(x){rev(which(x>=0))[1]})
-
-                              #-Make sure that values are within the interval of departure and arrival time
-                              subse <- which(is.na(st <= en) == FALSE & (st <= en) == TRUE)
-
-                              st <- st[subse]
-                              en <- en[subse]
-
-                              #-Assign Tacsat data with FT_REF from Eflalo2 dataset where they link
-
-                              if(length(st)!=1){
-
-                                idx   <- unlist(mapply(seq,st,en,SIMPLIFY=FALSE))
-                                reps  <- unlist(lapply(mapply(seq,st,en,SIMPLIFY=FALSE),length))
-                                .vms$FT_REF      <- 0
-                                .vms$FT_REF[idx] <- rep(tripn[subse],reps)
-                              }
-                              if(length(st)==1){
-                                .vms$FT_REF <- 0
-                                .vms$FT_REF[seq(st,en)] <- rep(tripn[subse],length(seq(st,en)))
-                              }
-                              if(length(st)==0){
-
-                                .vms$FT_REF <- 0
-                              }
-
-
-           } # end NIELS
-
-   FRANCOIS <- TRUE
-   if(FRANCOIS){
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         #!  DO THE LINK - APPROACH 2 #!#!!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-
-         #!!!!!!!!!!!!!!!!!!#
-         #!!!!!!!!!!!!!!!!!!#
-         # -If IT DOES NOT EXIST YET-,
-         # FIND THE MID-TIME OF VMS TRIPS
-         if(any(colnames(.vms)%in%"SI_DATIM")){
-           if(!any(colnames(.vms)%in%"SI_DTIME")){
-            # find and add the first point of each trip
-           .vms$start.trip <- c(1,diff(.vms[,"SI_FT"]))
-           .vms$end.trip <- c(diff(.vms[,"SI_FT"]), .vms[nrow(.vms),"SI_FT"])
-           .vms[.vms$start.trip>0, "start.trip"] <- .vms[.vms$start.trip>0, "SI_FT"]
-           .vms[.vms$end.trip>0, "end.trip"] <- .vms[.vms$end.trip>0, "SI_FT"]
-
-           tmp <- .vms[.vms$start.trip>0,]
-           tmp <- tmp[,c("VE_REF","SI_DATIM","SI_FT")]
-           tmp2 <- .vms[.vms$end.trip>0,]
-           tmp2 <- tmp2[,c("VE_REF","SI_DATIM","SI_FT")]
-           .vms <- .vms[,!colnames(.vms) %in% c("start.trip", "end.trip")] # remove tool columns
-           table.midtime <- merge(tmp, tmp2, by.x="SI_FT", by.y="SI_FT")
-           table.midtime <- table.midtime[, c("SI_FT","VE_REF.x","SI_DATIM.x","SI_DATIM.y") ]
-           colnames(table.midtime) <- c("SI_FT","VE_REF","SI_DTIME","SI_ATIME")
-           } else{
-           table.midtime <- .vms[, c("SI_FT","VE_REF","SI_DTIME","SI_ATIME") ]
-           table.midtime <- table.midtime[!duplicated(data.frame(table.midtime$SI_FT, table.midtime$VE_REF)),]
-           }
-         } else{stop("no 'SI_DATIM' found in vms")}
-         SI_MIDTIME <- rep(0, nrow(table.midtime))
-
-         for(r in 1: nrow(table.midtime)){
-           SI_MIDTIME[r] <-  as.character(seq(from=table.midtime$SI_DTIME[r], to=table.midtime$SI_ATIME[r], length.out = 3)[2])
-
-         }
-         table.midtime$SI_MIDTIME <-  SI_MIDTIME
-         if(!any(colnames(.vms)%in%"SI_MIDTIME")){ # here we are...
-              .vms <- merge(.vms, table.midtime[,c("SI_FT","SI_MIDTIME")], by.x="SI_FT", by.y="SI_FT")
-         }
-
-
-
-        #!!!!!!!!!!!!!!!!!!#
-        #!!!!!!!!!!!!!!!!!!#
-        # ASSIGN A 'BK.TRIPNUM' FROM LOGBOOK TO EACH VMS TRIP
-         trunk <-1 # trunk give the part of the year to be plotted (1 to 5)
-         # visual check
-         if(general$visual.check){
-            windows(width=8, height=4)
-            ltrunk <- (nrow(table.midtime)/5)
-            idxtrunk <-  (trunk+(trunk-1)*ltrunk):(trunk*ltrunk)
-             # plot(table.midtime$SI_DTIME[idxtrunk],rep(1,length(table.midtime$SI_DTIME[idxtrunk])),
-             plot(table.midtime$SI_DTIME, rep(1,length(table.midtime$SI_DTIME)),
-                 ylim=c(0,0.52), type="n", ylab="", axes=FALSE)
-            r <- as.POSIXct(round(range(table.midtime$SI_DTIME), "days"))
-            axis.POSIXct(1, at=seq(r[1], r[2], by="month"), format="%e%b%y:%H:%M")
-            axis(2, at=c(0.5,0.1),labels=c("VMS","LOGBOOK"))
-
-            for(i in 1:nrow(table.midtime))  {
-              segments(as.POSIXct(table.midtime$SI_DTIME[i], tz='GMT'), 0.5, as.POSIXct(table.midtime$SI_ATIME[i], tz='GMT'), 0.5, col=1)
-              points(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.5, col=1)
-              text(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.52, table.midtime$SI_FT[i], cex=0.5, col=1)
-
-            }
-
-            tmp <- .logbk[, c("LE_DTIME","LE_LTIME", "LE_MIDTIME", "FT_REF")]
-            tmp <- tmp[!duplicated(tmp$LE_MIDTIME), ]
-            for(i in 1:nrow(tmp)){
-              segments(as.POSIXct(tmp$LE_DTIME[i], tz='GMT'), 0.1, as.POSIXct(tmp$LE_LTIME[i], tz='GMT'), 0.1, col=1)
-              points(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.1, col=1)
-              text(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.0785, tmp$FT_REF[i], cex=0.5, col=1)
-            }
-          }
-
-
-          # THE CORE CODE: compare bk$LE_MIDTIME and vms$SI_MIDTIME
-          # find the nearest bk$LE_MIDTIME for each vms$SI_MIDTIME
-          # and then change levels
-          # (so, for each mid.time in vms, a FT_REF will be find)
-          # (so, no lines in vms without a FT_REF from bk...)
-          fa1 <- levels(factor(.vms$SI_MIDTIME))
-          new.levels <- fa1
-          fa2 <-  levels(factor(.logbk$LE_MIDTIME))
-          for(i in 1:length(fa1)) { # for each level in vms
-             tmp <-  abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] ))
-             if(all(is.na(tmp))) tmp <- abs(as.numeric( as.Date(fa2) - as.Date(fa1)[i] )) # debug the R bug in case of mid-time at 00:00 hour
-             new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )]  # find the nearest level in logbook
-          }
-          .vms$SI_MIDTIME <- factor(as.character(.vms$SI_MIDTIME))
-          sauv <- .vms$SI_MIDTIME
-          levels(.vms$SI_MIDTIME) <- new.levels # and change mid.time in vms to force the merging
-
-          # finally, replace levels by the FT_REF
-          tmp <-  .logbk[.logbk$LE_MIDTIME %in% .vms$SI_MIDTIME , c("FT_REF","LE_MIDTIME")]
-          tmp2 <- tmp[!duplicated(tmp$FT_REF),]
-          idx <- match( levels(.vms$SI_MIDTIME), tmp2$LE_MIDTIME )
-          .vms$FT_REF <- .vms$SI_MIDTIME # init
-          levels(.vms$FT_REF) <- as.character(tmp2$FT_REF )   [idx]
-
-
-          if(general$visual.check){
-            for(i in 1: nrow(.vms))  {
-               arrows(as.POSIXct( sauv[i]), 0.5 ,as.POSIXct( .vms$SI_MIDTIME[i], tz='GMT'),0.1, length=0.1)
-            }
-          }
-
-          if(general$visual.check){
-            ve <- as.character(.logbk$VE_REF[1])
-            savePlot(filename = file.path(general$output.path,
-                            paste("assign_eflalo_tripnum_to_vms_",ve,"_",general$a.year,".jpeg",sep="")),type ="jpeg")
-           dev.off()
-          }
-
-
-        ## ADD A WARNING IN CASE OF LONG (UNREALISTIC) TRIPS ##
-        diff.date <- table.midtime$SI_ATIME - table.midtime$SI_DTIME    # if at least one trip >30 days
-        if(attributes(diff.date)$units=="secs")  idx <- which((((diff.date)/3600)/24) >30)
-        if(attributes(diff.date)$units=="hours")  idx <- which((((diff.date)/1)/24) >30)
-        attributes((table.midtime$SI_ATIME - table.midtime$SI_DTIME ))
-        if (length( idx) >0){
-             cat(paste("at least one vms trip > 30 days detected! check harbours...", "\n", sep=""))
-            suspicious <- .vms[.vms$SI_FT %in%  table.midtime$SI_FT[idx] ,]
-            tmp <- table(suspicious$SI_LATI)
-            lat.suspicious <- names(tmp[tmp>5])
-            if(length(lat.suspicious)!=0) cat(paste("potential harbour likely near lat ",lat.suspicious,"\n",sep=""))
-            tmp <- table(suspicious$SI_LONG)
-            long.suspicious <- names(tmp[tmp>5])
-            if(length(long.suspicious)!=0) cat(paste("potential harbour likely near long ",long.suspicious,"\n",sep=""))
-            }  # if at least one trip >30 days
-        rm(table.midtime) ; gc(reset=TRUE)
-
-
-
-         .logbk$LE_MIDTIME    <- factor(.logbk$LE_MIDTIME)
-         .logbk$FT_REF        <- factor(.logbk$FT_REF)
-         .vms$SI_MIDTIME      <- factor(.vms$SI_MIDTIME)
-         .vms$FT_REF          <- factor(.vms$FT_REF)
-
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         #! ASSIGN A 'SI_FT' FROM VMS TRIP NUM TO  #!#!#!#
-         #! LOGBOOK TRIPS WITH NO VMS CORRESPONDANCE #!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-       dep.bk.not.in.vms  <- unique( .logbk$FT_REF [ !( .logbk$FT_REF %in%  .vms$FT_REF )  ] )
-       if(length(dep.bk.not.in.vms)!=0){
-         # bk tripnum from dep not in vms
-         idx <-  .logbk$FT_REF %in% dep.bk.not.in.vms
-         bk  <- .logbk[idx,] [order(.logbk[idx,]$LE_DTIME),]
-         if(!"SI_MIDTIME" %in% colnames(.vms)){
-            vms <- .vms  [order(.vms$SI_DTIME),]
-            SI_MIDTIME <- rep(NA, nrow(vms))
-            for(r in 1: nrow(vms)){
-              SI_MIDTIME[r] <-  as.character(seq(from=vms$SI_DTIME[r], to=vms$SI_ATIME[r], length.out = 3)[2])
-            }
-            vms$SI_MIDTIME <-  SI_MIDTIME
-         } else{ vms <- .vms[order(.vms$SI_MIDTIME),]}
-         #1- compare bk$mid.time and vms$mid.time
-         # find the nearest vms$mid.time for each bk$mid.time
-         # and then change levels
-         # (so for each mid.time in bk, a tripnum will be find)
-         # (so no lines in bk without a tripnum...)
-         fa1 <- levels(factor(bk$LE_MIDTIME))
-         new.levels <- fa1
-         fa2 <-  levels(factor(vms$SI_MIDTIME))
-         for(i in 1:length(fa1)) { # for each level in logbk
-          tmp <-  abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] ))
-          new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )]  # find the nearest level in vms
-         }
-         bk$LE_MIDTIME <- factor(as.character(bk$LE_MIDTIME))
-         levels(bk$LE_MIDTIME) <- new.levels # and change mid.time in logbk to force the merging
-
-         # finally, replace levels by the tripnum
-         # (note: a same FT_REF in vms can have different mid.time
-         # due to the first merging of vms to logbk in the vms analysis)
-         tmp <-  vms[vms$SI_MIDTIME %in% bk$LE_MIDTIME , c("FT_REF","SI_MIDTIME")]
-         tmp2 <- tmp[!duplicated(data.frame(tmp$FT_REF, tmp$SI_MIDTIME)),]
-         idx2 <- match(levels(bk$LE_MIDTIME), tmp2$SI_MIDTIME)
-         bk$FT_REF <- bk$LE_MIDTIME # init
-         levels(bk$FT_REF)  <- as.character(tmp2$FT_REF) [idx2]
-
-         # output
-         bk$LE_MIDTIME   <- as.character(bk$LE_MIDTIME)
-         bk$FT_REF <- as.character(bk$FT_REF)
-         .logbk[idx,][order(.logbk[idx,]$LE_DTIME),]  <- bk
-         }
-
-
-        } # end FRANCOIS
-
-
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         # ASSIGN A RECTANGLE TO EACH PING #!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         .vms   <- assignPointsToSpatialGrid(xx=.vms)
-
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         # COMPUTE EFFORT.MINS      !#!#!#!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-          # vms
-          .vms <- .vms[order(.vms$SI_DATIM),]
-          .vms$LE_EFF_VMS <- abs(c(0, as.numeric(.vms[-nrow(.vms),"SI_DATIM"] -
-                                        .vms[-1,"SI_DATIM"], units="mins")))
-          start.trip <- c(1,diff(.vms[,"SI_FT"]))
-          .vms[start.trip!=0, "LE_EFF_VMS"] <- 0  # just correct for the trip change points
-          # logbook (start/end of trip)- caution: will be repeated per ping in the merged output
-          #SI_DATIM      <- as.POSIXct(paste(.logbk$FT_DDAT,.logbk$FT_DTIME,sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-          #SI_DATIM2     <- as.POSIXct(paste(.logbk$FT_LDAT,.logbk$FT_LTIME,sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-          #.logbk$LE_EFF <- an(difftime(SI_DATIM2,SI_DATIM,units="mins"))
-
-
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         # ASSIGN FISHING/NON-FISHING (optional)!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         if(general$detectFishing && general$speed=="segment") {
-             ## add a gear form tacsat from the logbook info (after the first merging)
-             ## because the assignement of a state is gear-specific.
-             ## caution here: we assume only one gear used inside a trip...
-             # because note that we remove 'logevent' and keep only one duplicate of tripnum
-             .vms$LE_GEAR <- factor(.vms$FT_REF) # init
-             tmp <- .logbk[,c("LE_GEAR","FT_REF")]
-             tmp <- tmp[!duplicated(tmp$FT_REF),] #remove logevent and keep only one duplicate of tripnum
-             tmp <- tmp[tmp$FT_REF %in% unique(.vms$LE_GEAR),]
-             idx <- match(levels(.vms$LE_GEAR), as.character(tmp$FT_REF))
-             dd  <-  as.character(tmp$LE_GEAR)  [idx]
-             dd  <- replace(dd, is.na(dd), "UKN") # unknown because not matched if Niels
-            levels(.vms$LE_GEAR) <- dd
-
-             # then do the assignement of the state
-             # according to a segmented regression on the speed histogram
-             .vms <- segmentTacsatSpeed (
-                                 tacsat=.vms,
-                                  vessels=a.vesselid,
-                                   force.lower.bound=0.5,  # to do: create an arg in the parent function instead...
-                                    gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'), # to do: create an arg in the parent function instead...
-                                     general=list(a.year=general$a.year,
-                                                  output.path=general$output.path,
-                                                  what.speed=general$what.speed,
-                                                  visual.check=TRUE
-                                                  )
-                                         )
-                #=> (semi)automatic detection of the fishing peak
-                # (put here because the LE_GEAR need to be informed)
-         # alternatively,
-         if(FALSE){
-             .vms$SI_STATE  <- 0
-             .vms           <- segmentedTacsatSpeed(.vms[.vms$VE_REF==a.vesselid,], units="year", analyse.by="LE_GEAR",
-                                  speed="instantaneous", logfit=FALSE, CI=0.95)
-             .vms$SI_STATE  <- .vms$SI_STATE+1 # back compatibility with mergeEflalo2Pings() i.e. 1: fishing, 2: steaming
-            # ...but what about the passive gears then?
-          }
-
-
-            .vms <- .vms[, !colnames(.vms) %in% "LE_GEAR"] # remove after use to avoid future conflict.
-         }
-         # some alternatives TO DO:
-         #if(general$detectFishing && general$speed=="lookuptable")
-         #   .vms <- lookupSpeedTacsat (tacsat=.vms, vessels=a.vesselid)
-         #if(general$detectFishing && general$speed=="bayesian")
-         #   .vms <- bayesianFiltering (tacsat=.vms, vessels=a.vesselid)
-
-
-
-         rm(er); rm(xx) ; gc(reset=TRUE)
-
-    
-            
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         # MERGING WITH VMS PER TRIP !!!!!!!!!!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         do.merging <- function(method="FT_REF", .logbk, .vms, general){
-
-              # IF BY PING-------------
-              # find total nb of FISHING ping per tripnum from vms  # used for method 1  'FT_REF'
-              if(method=="FT_REF"){
- #              .vms$count.fping.trip  <- factor(.vms$FT_REF)  # init
- #             count.fping.trip <- table(.vms[.vms$SI_STATE==1,]$FT_REF)
- #             # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging
- #             levels(.vms$count.fping.trip) <- count.fping.trip[levels(.vms$count.fping.trip)]  # mapping
- #             .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA
-
-              # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging
-              if(any(.vms$SI_STATE==1)){
-              .vms$count.fping.trip             <- factor(.vms$FT_REF)  # init
-              countp                            <- countPings(~VE_REF+FT_REF, .vms[.vms$SI_STATE=="1",])
-              rownames(countp)                  <-  countp$FT_REF
-              levels(.vms$count.fping.trip)     <- countp[levels(.vms$count.fping.trip),"pings"]    # mapping
-              if(any(.vms$SI_STATE %in% 2)) .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA
-              } else{.vms$count.fping.trip <- NA}
-
-    
-              # => COUNT nb of gears per FT_REF because each ping will be repeated by gear after merging
-              count.gr.trip <- tapply(.logbk$LE_GEAR, .logbk$FT_REF, function(x) length(unique(x)))
-              .logbk$count.gr.trip <- count.gr.trip[.logbk$FT_REF]  # mapping
-
-              }
-
-
-              # find total nb of FISHING ping per trip-icessquare from vms  # used for method 2   'FT_REF_SQ'
-              if(method=="FT_REF_SQ"){
-     #         .vms$count.fping.trip.sq  <- factor(.vms$FT_REF_SQ)  # init
-     #         count.fping.trip.sq <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ) # COUNT nb of FISHING pings per FT_REF_SQ
-     #         levels(.vms$count.fping.trip.sq) <- count.fping.trip.sq[levels(.vms$count.fping.trip.sq)]  # mapping
-     #         if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA
-
-              if(any(.vms$SI_STATE==1)){
-              .vms$count.fping.trip.sq          <- factor(.vms$FT_REF_SQ)  # init
-              countp                            <- countPings(~VE_REF+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",])
-              rownames(countp)                  <-  interaction(countp$FT_REF,countp$SI_RECT)
-              levels(.vms$count.fping.trip.sq ) <- countp[levels(.vms$count.fping.trip.sq),"pings"]    # mapping
-              if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA
-              } else{.vms$count.fping.trip.sq <- NA}
-
-
-              # => COUNT nb of gears per FT_REF_SQ because each ping will be repeated by gear after merging
-              count.gr.trip.sq <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ, function(x) length(unique(x)))
-              .logbk$count.gr.trip.sq <- count.gr.trip.sq[.logbk$FT_REF_SQ]  # mapping
-              }
-
-
-              # find total nb of FISHING ping per trip-icessquare-day from vms  # used for method 3   'FT_REF_SQ_DAY'
-              if(method=="FT_REF_SQ_DAY"){
-#              .vms$count.fping.trip.sq.day  <- factor(.vms$FT_REF_SQ_DAY)  # init
-#              count.fping.trip.sq.day <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ_DAY) # COUNT nb of FISHING pings per FT_REF_SQ_DAY
-#              levels(.vms$count.fping.trip.sq.day) <- count.fping.trip.sq.day[levels(.vms$count.fping.trip.sq.day)]  # mapping
-#              if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA
-
-
-              if(any(.vms$SI_STATE==1)){
-              .vms$count.fping.trip.sq.day          <- factor(.vms$FT_REF_SQ_DAY)  # init
-              countp                                <- countPings(~VE_REF+day+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",])
-              rownames(countp)                      <-  interaction(countp$FT_REF, countp$SI_RECT, countp$SI_DAY)
-              levels(.vms$count.fping.trip.sq.day) <- countp[levels(.vms$count.fping.trip.sq.day),"pings"]    # mapping
-              if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA
-              } else{.vms$count.fping.trip.sq.day <- NA}
-
-
-              # => COUNT nb of gears per FT_REF_SQ_DAY because each ping will be repeated by gear after merging
-              count.gr.trip.sq.day <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ_DAY, function(x) length(unique(x)))
-              .logbk$count.gr.trip.sq.day <- count.gr.trip.sq.day[.logbk$FT_REF_SQ_DAY]  # mapping}
-              }
-
-
-
-              # do the merging between .logbk and .vms according to
-              #  meth1: 'FT_REF' OR meth2: 'FT_REF_SQ' OR meth3: 'FT_REF_SQ_DAY'
-              # need to use a trick to avoid "out of memory" doing the merge()
-              coln.idx1 <- which(!colnames(.logbk)%in%c("VE_REF", method))
-              coln1 <- colnames(.logbk)[coln.idx1]
-              tmp1 <- data.frame(coll= collapse.all.columns  (.logbk, columns= coln.idx1  ),
-                         VE_REF=.logbk$VE_REF, a.method= .logbk[,method] ) #.logbk
-              coln.idx2 <- which(!colnames(.vms)%in%c("VE_REF", method))
-              coln2 <- colnames(.vms)[coln.idx2]
-              tmp2 <- data.frame(coll2= collapse.all.columns  (.vms, columns=  coln.idx2 ),
-                         VE_REF=.vms$VE_REF, a.method= .vms[,method] )  #.vms
-              tmp1[,"a.method"] <- factor(tmp1[,"a.method"] )
-              tmp2[,"a.method"] <- factor(tmp2[,"a.method"] )
-
-              merged.this.vessel <- merge(tmp1, tmp2, all.x=TRUE, all.y=TRUE, suffixes = c(".bk",".vms"))
-              #=> so, with all.y = TRUE, the vms records without corresponding logbk records are kept and NA are produced on the logbook part
-              #=> so, with all.x = TRUE, the logbk records  without corresponding vms records are kept and NA are produced on the vms part
-              merged.this.vessel$coll <- replace(as.character(merged.this.vessel$coll),is.na(merged.this.vessel$coll), paste(rep("NA",length(coln1)),collapse="#"))
-              merged.this.vessel$coll <- factor(merged.this.vessel$coll)
-              #=> adapt 'coll' to get a vector of NA (NA in case of 'in vms but not in logbook')
-              merged.this.vessel$coll2 <- replace(as.character(merged.this.vessel$coll2),is.na(merged.this.vessel$coll2), paste(rep("NA",length(coln2)),collapse="#"))
-              # adapt 'coll2' to get a vector of NA (NA in case of 'in logbook but not in vms')
-              merged.this.vessel$coll2 <- factor(merged.this.vessel$coll2)
-              colnames(merged.this.vessel)[colnames(merged.this.vessel)%in%"a.method"] <- method
-
-              tmp3 <- uncollapse.column(merged.this.vessel, column="coll")  # logbk
-              tmp4 <- uncollapse.column(merged.this.vessel, column="coll2") # vms
-              tmp5 <- cbind.data.frame(merged.this.vessel[,c("VE_REF", method)], tmp3, tmp4)
-              colnames(tmp5) <- c("VE_REF", method, coln1, coln2)
-              merged.this.vessel <- tmp5
-
-              # we can choose to correct to keep the land. weight:
-              # the loss in weight will come from the matching records having catches but
-              # without fishing pings (i.e. only steaming pings)!
-              if(is.null(general$conserve.all)) general$conserve.all <- FALSE
-              if(general$conserve.all){
-              # do the conservation of landings anyway?
-              # detect possible weight landed while no feffort detected from vms
-                   # find FT_REF with some NA
-                   vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip=="NA","FT_REF"]))
-                   # then, find FT_REF with at least one no NA
-                   no.vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip!="NA","FT_REF"]))
-                   tripnum.all.na.inside <- vv[!vv%in%no.vv] # trip num without at least one count.fping!
-                   # so, deduce loss in weight
-                   zz<- merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,]
-
-                  if(method=="FT_REF"){
-                     # in this case, reallocate evenly between all pings (caution: including steaming pings)
-                     merged.this.vessel[,"count.fping.trip"] <- anf(merged.this.vessel[,"count.fping.trip"])
-                     merged.this.vessel$FT_REF <- factor( merged.this.vessel$FT_REF)
-                     nbpings.per.trip <- unlist(lapply(split(merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,],
-                                           merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,]$FT_REF),nrow))
-                     merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "count.fping.trip"] <- rep(nbpings.per.trip,nbpings.per.trip )
-                     merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "flag"] <- 5
-                    }
-                } # end conserve.all
-
-
-
-
-              # apply the catches re-distribution
-              # method 1, 2 and 3: per ping
-              # PER PING:
-              # ASSUMING EQUAL ALLOCATION BETWEEN FISHING PINGS AND GEARS USE INSIDE A SAME TRIP
-              nm        <- names(merged.this.vessel)
-              idx.col.w <- grep('KG', nm) # index columns with species weight
-              idx.col.v <- grep('EURO', nm) # index columns with species value
-              idx.col <- c(idx.col.w, idx.col.v)
-              if(method=="FT_REF_SQ_DAY"){
-                             merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) /
-                                                        anf(merged.this.vessel$count.fping.trip.sq.day)) /
-                                                                        anf(merged.this.vessel$count.gr.trip.sq.day)
-              }
-              if(method=="FT_REF_SQ"){
-                             merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) /
-                                                        anf(merged.this.vessel$count.fping.trip.sq)) /
-                                                                        anf(merged.this.vessel$count.gr.trip.sq)
-              }
-              if(method=="FT_REF"){
-                             merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) /
-                                                           anf(merged.this.vessel$count.fping.trip) ) /
-                                                                        anf(merged.this.vessel$count.gr.trip)
-              }
-
-
-   
-
-              return(merged.this.vessel)
-              }
-
-
-
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         # SET UP PRIMARY KEYS FOR MERGING!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         .logbk$FT_REF_SQ_DAY <- factor(paste(.logbk$FT_REF, ".", .logbk$LE_RECT,".", an(format(.logbk$LE_CTIME,  '%j')), sep=''))
-         .vms$FT_REF <- factor(.vms$FT_REF)
-         .vms$FT_REF_SQ <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT, sep=''))
-         .vms$FT_REF_SQ_DAY <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT,".", an(format(.vms$SI_DATIM,  '%j')), sep=''))
-
-
-   
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         # AGGREGATE WEIGHT PER SPECIES !#!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         nm         <- names(.logbk)
-         idx.col.w  <- grep('KG', nm) # index columns with species weight
-         idx.col.v  <- grep('EURO', nm) # index columns with species value
-         idx.col    <- c(idx.col.w, idx.col.v)
-           
-         DT  <- data.table(.logbk) # library data.table for fast grouping replacing aggregate()
-         # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ_DAY (NOTE: SO, 'LE_SEQNUM' IS AGGREGATED HERE)
-         eq1  <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep=""))
-         .logbk <- DT[,eval(eq1),by=list(FT_REF_SQ_DAY,VE_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)]
-         .logbk <- data.frame(.logbk)
-         colnames(.logbk) <- c("FT_REF_SQ_DAY","VE_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col])
-           
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-         # MERGING PROCEDURE CHOICE !#!#!#!#!#!#!#!#!#!#!#
-         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
-
-         my.split <- function(obj,a.sep="\\.",idx=1) unlist(lapply(strsplit(obj, a.sep),function(x)x[idx]))
-         # reduce the level
-         .logbk$FT_REF_SQ  <-  factor(paste(my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=1),
-                                                 my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=2),sep='.'))
-         # reduce the level
-         .logbk$FT_REF     <-       factor(my.split(as.character(.logbk$FT_REF_SQ),a.sep="\\.",idx=1))
-         
-         # find common keys
-         tripnum.sq.day.in.vms.and.in.bk <- .vms$FT_REF_SQ_DAY [.vms$FT_REF_SQ_DAY %in% .logbk$FT_REF_SQ_DAY]
-         tripnum.sq.in.vms.and.in.bk     <- .vms$FT_REF_SQ [.vms$FT_REF_SQ %in% .logbk$FT_REF_SQ]
-         .vms.in.bk                      <- .vms[ .vms$FT_REF_SQ_DAY %in%  tripnum.sq.day.in.vms.and.in.bk,]
-         .vms.in.bk2                     <- .vms[ !(.vms$FT_REF_SQ_DAY %in%  tripnum.sq.day.in.vms.and.in.bk) &
-                                                            .vms$FT_REF_SQ %in%  tripnum.sq.in.vms.and.in.bk,]
-         in.bk.and.feffort.not.at.0   <- unique(.vms.in.bk[.vms.in.bk$SI_STATE==1,]$FT_REF_SQ_DAY)
-         in.bk2.and.feffort.not.at.0   <- unique(.vms.in.bk2[.vms.in.bk2$SI_STATE==1,]$FT_REF_SQ)
-       
-         # split .vms and .logbk in three blocks
-           # vms with good match => go to meth3
-           .vms.for.meth3         <- .vms [.vms$FT_REF_SQ_DAY %in%   in.bk.and.feffort.not.at.0, ]
-           # vms with intermediate match => go to meth2
-           .vms.for.meth2         <- .vms [!(.vms$FT_REF_SQ_DAY  %in%   in.bk.and.feffort.not.at.0) &
-                                                     (.vms$FT_REF_SQ    %in%   in.bk2.and.feffort.not.at.0), ]
-           # vms with bad match => go to meth1
-           .vms.for.meth1         <- .vms [!(.vms$FT_REF_SQ_DAY  %in%   in.bk.and.feffort.not.at.0) &
-                                                      !(.vms$FT_REF_SQ  %in%   in.bk2.and.feffort.not.at.0), ]
-           # logbk with good match => go to meth3
-           .logbk.for.meth3       <- .logbk [.logbk$FT_REF_SQ_DAY %in%  in.bk.and.feffort.not.at.0, ]
-           # logbk with intermediate match => go to meth2
-           .logbk.for.meth2       <- .logbk [!(.logbk$FT_REF_SQ_DAY %in%   in.bk.and.feffort.not.at.0) &
-                                                    (.logbk$FT_REF_SQ %in%  in.bk2.and.feffort.not.at.0), ]
-           # logbk with bad match => go to meth1
-           .logbk.for.meth1       <- .logbk [!(.logbk$FT_REF_SQ_DAY %in%   in.bk.and.feffort.not.at.0) &
-                                                       !(.logbk$FT_REF_SQ %in%  in.bk2.and.feffort.not.at.0), ]
-
-           suppressWarnings(rm(merged1, merged2, merged3)) # clear
-             #!! METH1 !!#
-                 if(nrow(.logbk.for.meth1)!=0 && nrow(.vms.for.meth1)!=0 ) {
-                    # remove useless cols and aggregate according to the key 'FT_REF'
-                    .logbk.for.meth1 <- .logbk.for.meth1[, !colnames(.logbk.for.meth1)%in% c("FT_REF_SQ_DAY","FT_REF_SQ")]
-                    nm        <- names(.logbk.for.meth1)
-                    idx.col.w <- grep('KG', nm) # index columns with species weight
-                    idx.col.v <- grep('EURO', nm) # index columns with species value
-                    idx.col <- c(idx.col.w, idx.col.v)
-                    # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF   
-                    DT  <- data.table(.logbk.for.meth1) # library data.table for fast grouping replacing aggregate()
-                    eq1  <- c.listquote(paste("sum(", nm[idx.col],",na.rm=TRUE)",sep=""))
-                    .logbk.for.meth1 <- DT[,eval(eq1),by=list(VE_REF,FT_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)]
-                    .logbk.for.meth1 <- data.frame(.logbk.for.meth1)
-                    colnames(.logbk.for.meth1) <- c("VE_REF","FT_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col])
-                    # do.merging
-                    merged1  <- do.merging(method="FT_REF", .logbk.for.meth1, .vms.for.meth1, general)
-                    # add meth flag
-                     if("flag" %in% names(merged1)  && nrow(merged1[is.na(merged1[,"flag"]),])!=0){
-                        merged1[is.na(merged1[,"flag"]),"flag"] <- 1 # meth 1
-                        } else merged1$flag <- 1
-                    }
-                 #!! METH2 !!#
-                 if(nrow(.logbk.for.meth2)!=0 && nrow(.vms.for.meth2)!=0 ) {
-                    # remove useless cols and aggregate according to the key 'FT_REF_SQ'
-                    .logbk.for.meth2 <- .logbk.for.meth2[, !colnames(.logbk.for.meth2)%in% c("FT_REF_SQ_DAY","FT_REF")]
-                    nm        <- names(.logbk.for.meth2)
-                    idx.col.w <- grep('KG', nm) # index columns with species weight
-                    idx.col.v <- grep('EURO', nm) # index columns with species value
-                    idx.col <- c(idx.col.w, idx.col.v)
-                    # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ
-                    DT  <- data.table( .logbk.for.meth2) # library data.table for fast grouping replacing aggregate()
-                    eq2  <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep=""))
-                    .logbk.for.meth2 <- DT[,eval(eq2),by=list(VE_REF,FT_REF_SQ,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)]
-                    .logbk.for.meth2 <- data.frame(.logbk.for.meth2)
-                    colnames(.logbk.for.meth2) <- c("VE_REF","FT_REF_SQ","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col])
-                    # do.merging
-                    merged2 <- do.merging(method="FT_REF_SQ", .logbk.for.meth2, .vms.for.meth2, general)
-                    # add meth flag
-                    merged2$flag <- 2 # meth 2
-                 }
-                 #!! METH3 !!#
-                 if(nrow(.logbk.for.meth3)!=0 && nrow(.vms.for.meth3)!=0 ) {
-                    # do.merging
-                    merged3 <- do.merging(method="FT_REF_SQ_DAY", .logbk.for.meth3, .vms.for.meth3, general)
-                   # add meth flag
-                    merged3$flag <- 3 # meth 3
-                 }
-
-                 # bind the three blocks
-                 merged <- NULL ; colnm <- NULL
-                 for(i in 1: 3){
-                   a.table <- try(get(paste('merged',i,sep='')), silent=TRUE)
-                   if(class(a.table)!="try-error"){
-                     a.table <- a.table[, !colnames(a.table) %in%
-                                  c("count.fping.trip.sq.day","count.fping.trip.sq","count.fping.trip",
-                                      "tot.fish.effort.trip","tot.fish.effort.trip.sq",
-                                         "count.gr.trip", "count.gr.trip.sq", "count.gr.trip.sq.day",
-                                           "FT_REF_SQ", "FT_REF_SQ_DAY")] # remove tool columns
-                     if(i==1) colnm <-  colnames(a.table) ; if(is.null(colnm)) colnm <-  colnames(a.table)
-                     merged <- rbind.data.frame (merged, a.table[, colnm])
-                     }
-                   }
-
-                 # if still 'not merging' part, retrieve on NA side i.e. occurs when pings in vms but not in bk
-                   merged <- retrieveOnBkSide(merged, type.data=c( "VE_FLT","VE_KW","LE_GEAR", "LE_MET_level6"))  # i.e. when metier=='NA'
-
-
-        # clean up
-        rm(a.table, merged1, merged2, merged3, merged.this.vessel,.vms, .logbk, logbk.this.vessel, vms.this.vessel)
-        gc(reset=TRUE)
-
-        # restore tacsat names               "%e/%m/%Y %H:%M"
-        idx <- merged$SI_DATIM!='NA' # NA is possible when bk not in vms because bk.tripnum vms may belong to another block than block1
-        merged$SI_DATIM <- as.character(merged$SI_DATIM)
-        merged$SI_DATE  <- NA
-        merged[idx,"SI_DATE"] <- paste(substr(merged[idx,]$SI_DATIM ,9,10),"/",
-                                      substr(merged[idx,]$SI_DATIM , 6,7), "/", substr(merged[idx,]$SI_DATIM ,1,4), sep='')
-        merged$SI_TIME  <- NA
-        merged[idx,"SI_TIME"] <- paste(substr(merged[idx,]$SI_DATIM , 12,13),":",
-                                      substr(merged[idx,]$SI_DATIM , 15,16), sep='')
-
-        # last calculation
-        merged$KW_HOURS <- anf(merged$VE_KW) * anf(merged$LE_EFF_VMS) /60
-
-        # order chronologically
-        merged <- orderBy(~SI_DATIM, merged)
-
-        # last clean up
-        merged <- merged[, !colnames(merged) %in% c('idx', 'icessquare', "SI_DATIM", "SI_MIDTIME")]
-
-       # save------------
-       save("merged",   file=file.path(general$output.path,
-             paste("merged_",  a.vesselid,"_",general$a.year,".RData", sep='')))
-       cat(paste("save 'merged'...OK\n\n",sep=""))
-
-
-               }else{  # end 'a.flag'
-     cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n"))
-     # because no vms for this vessel...
-     # TO DO: the logbk way
-     #...
-       }
-     }else{  # end try-error
-     cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n"))
-     # because no vms for this vessel...
-     # TO DO: the logbk way
-     #...
-     }
-
-
-
-
-     } # end a.vesselid
-
-
-
-
-
-
-return()
-}
-
-
-
-
-
-
-  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-  ##!!!!!MAIN!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
- if(FALSE) {
-
-
-  #\dontrun{
-  data(eflalo)
-  data(tacsat)
-  data(euharbours); euharbours <- harbours
-
-  # format
-  eflalo <- formatEflalo(eflalo)
-  tacsat <- formatTacsat(tacsat)
-
-  # order tacsat chronologically with library(doBy)
-  tacsat <- sortTacsat(tacsat)
-
-  # test each ping if in harbour or not
-  tacsat$SI_HARB <- NA
-  euharbours$Description <- euharbours$harbour
-  tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG),
-                                   lat=anf(tacsat$SI_LATI),
-                                   harbours=euharbours,
-                                   rowSize=30, returnNames=TRUE)
-  inHarb <- tacsat$SI_HARB
-  inHarb <- replace(inHarb, !is.na(inHarb), 1)
-  inHarb <- replace(inHarb, is.na(inHarb), 0)
-  inHarb <- as.numeric(inHarb)
-
-  # assign a trip identifier
-  tacsat$SI_FT <- 1 # init
-  idx <- which(inHarb==0)
-  tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index
-
-  # keep 'out of harbour' points only
-  # (but keep the departure point and the arrival point lying in the harbour)
-  startTrip <- c(diff(tacsat[,"SI_FT"]), 0)
-  endTrip   <- c(0, diff(tacsat[,"SI_FT"]))
-  tacsat[which(startTrip>0),"SI_FT"]  <-  tacsat[which(startTrip>0)+1,"SI_FT"] 
-  tacsat[which(endTrip<0),"SI_FT"]    <-  tacsat[which(endTrip<0)-1,"SI_FT"] 
-  tacsat <- tacsat[which(inHarb==0 |  startTrip>0 |  endTrip<0),]
-
-
-  # assign a state to each ping (here, useless if detectFishing at TRUE)
-  tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming)
-  # fake speed rule for fishing state
-  tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1
-
-
-  # reduce the size of the eflalo data by merging species
-  # (assuming that the other species is coded MZZ), threshold in euros.
-  eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ")
-
-  # debug if eflalo has not been cleaned earlier
-  eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),]
-  
-  # an informed VE_FLT is also required
-  if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1"
-  
-  # possible mis-naming mistakes
-    if(!match('LE_MET_level6',colnames(eflalo))>0){
-      eflalo$LE_MET_level6 <- eflalo$LE_MET
-    }
-
-  # debug
-  eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",]
-
-
-  # TEST FOR A GIVEN SET OF VESSELS
-  # (if detect.fishing is true then do also detection of fishing activity
-  # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states
-  # that will overwrite the existing SI_STATE)
-  mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"),
-                     general=list(output.path=file.path("C:","output"),
-                     visual.check=TRUE, detectFishing=TRUE, speed="segment",
-                     what.speed="calculated"))
-  # ...OR APPLY FOR ALL VESSELS IN eflalo
-  mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat,
-                     general=list(output.path=file.path("C:","output"),
-                     visual.check=TRUE, detectFishing=TRUE, speed="segment",
-                     what.speed="calculated"))
-  gc(reset=TRUE)
-
-  # load the merged output table for one vessel
-  load(file.path("C:","output","merged_804_1800.RData"))
-
-  # check the conservation of landings
-  sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE))
-  sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE)
-
-
-   # ...or bind all vessels (keeping only some given species here)
-  bindAllMergedTables (vessels=c("738", "804"), a.year = "1800",
-                      species.to.keep=c("PLE","COD"),
-                      folder = file.path("C:","output"),
-                      all.in.one.table=TRUE)
-
-    # ...and load the merged output table for all vessels
-  load(file.path("C:","output","all_merged__1800.RData"))
-
-  # map landing of cod from all studied vessels
-  # ( with debugging if tacsat has not been cleaned earlier)
-  graphics.off()
-  df1<- all.merged[, c("SI_LATI","SI_LONG","LE_KG_COD")]
-  df1$SI_LONG <- anf(df1$SI_LONG)
-  df1$SI_LATI <- anf(df1$SI_LATI)
-  df1 <-   df1[ !is.na(df1$SI_LATI),]
-  df1 <-   df1[ !is.na(df1$SI_LONG),]
-  vmsGridCreate(df1,nameLon="SI_LONG", nameLat="SI_LATI",
-                nameVarToSum = "LE_KG_COD", cellsizeX =0.1,
-                cellsizeY =0.05,  legendtitle = "COD landings (kg)")
-
- # but you need to remove steaming points before gridding!
-  df2<-df1[-which(is.na(df1$LE_KG_COD)),]
-  vmsGridCreate(df2,nameLon="SI_LONG",nameLat="SI_LATI", we = 3, ea = 6, so = 50, no = 54,
-                nameVarToSum = "LE_KG_COD",cellsizeX =0.1,
-                cellsizeY =0.05,  legendtitle = "COD landings (kg)", plotPoints =TRUE, 
-                breaks0=c(1,2,4,8,16,32,64,100000))
-
-
-
-  # CONVERT TO FISHFRAME FORMAT (might take some time running)
-  # (by default, this will keep all the species in the output table)
-  tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF),
-                              species.to.keep=character(),
-                              folder = file.path("C:","output"),
-                              all.in.one.table=FALSE)
-
-  ff  <- pings2Fishframe (general=list(output.path=file.path("C:","output"),
-                          a.year=1800, a.country="NLD", degree=0.05 ) )
-
-
-  # TO DO....
-  # Use the interpolation routine to improve the location of the effort
-  #all.merged$SI_SP <- as.numeric(as.character( all.merged$SI_SP))
-  #all.merged$SI_HE <- as.numeric(as.character( all.merged$SI_HE))
-  #all.merged$SI_LONG <-as.numeric(as.character(all.merged$SI_LONG))
-  #all.merged$SI_LATI <-as.numeric(as.character(all.merged$SI_LATI))
-  #interpolations      <- interpolateTacsat( all.merged [,c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE")]
-  #                            ,interval=120
-  #                            ,margin=12
-  #                            ,res=100
-  #                            ,method="cHs"
-  #                            ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6))
-  #                            ,headingAdjustment=0
-  #                            )
-  #interpolationsED <- equalDistance(interpolations,res=10)
-  # make sure that the 'res' statement in the interpolateTacsat is significantly bigger
-  # than the 'res' statement in the equalDistance function.
-
-  # then map again...
-  #vmsGridCreate(interpolationsED,nameLon="SI_LONG",nameLat="SI_LATI",
-  #          cellsizeX =0.05, cellsizeY =0.05, legendtitle = "landings (kg)")
-
-
-
-  #}
-
-} # end main
\ No newline at end of file
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+# A 'R' ROUTINE FOR THE COUPLING OF VMS AND LOGBOOKS
+# WP4 - Lot2 EU tender VMS/LOGBOOKS COUPLING
+# author: Francois Bastardie (DTU- Aqua; fba@aqua.dtu.dk)
+# January 2010
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
+
+
+
+
+
+
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!MERGE LOGBOOKS WITH VMS PER VESSEL!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+
+
+#!!!!!!!!!!!!!!!!!!!!!#
+#!!!!!!!!!!!!!!!!!!!!!#
+
+
+#' Coupling of VMS and logbooks data including landings dispatching between
+#' fishing pings.
+#' 
+#' The first step of the merging routine consisted of assigning a common
+#' identifier joining each VMS trip to a logbook trip by searching for the
+#' nearest trip among all logbook trips based on the temporal mid-point of
+#' trips. This circumvents problems where the start and end time of VMS and
+#' logbook trips do not match exactly and ensure that each logbook trip will be
+#' merged with a given VMS trip even if the reporting of the start and end date
+#' by fishermen is uncertain. We observed in the data that this reporting date
+#' may fluctuate with +/- 1 day around the VMS trip dates possibly because of
+#' error in reporting. Additionally, the possible few remaining logbook trips
+#' were also linked with the nearest VMS trips. This latter event may occur
+#' from unknown failure in the VMS device while fishermen were still fishing.
+#' In the processing, links were then forced to nearest VMS trips to prevent
+#' loss of any reported landings.
+#' 
+#' After this fist merging, the distinction of fishnig and not-fishing states
+#' can be performed if the 'detectFishing' argument is at TRUE. The current
+#' default method ('segment') will run an automatic detection of the fishing
+#' peak given the gear type in use. After having detected the vessel-specific
+#' speed lower and upper boundaries, one state (i.e. in SI_STATE) is assigned
+#' to each ping.
+#' 
+#' The second step of the process evaluated the consistency between both types
+#' of data sources. Sequentially, a quality flag (i to iv) was issued for each
+#' recorded position depending on the various degrees of matching, from finer
+#' to coarser, if both data have in common: (i) the trip identifier, the same
+#' area (ICES rectangle), and the catch date (ii) only the trip identifier and
+#' the area, or (iii) only the trip identifier. (the flag iv occur when not
+#' fishing points is remaining while some landings are still to be allocated in
+#' these very few cases, all steaming points are actually converted to fishing
+#' points). The initial datasets (both VMS and logbooks) were split into
+#' sub-blocks according to this flag. Each pair of sub-blocks was then joined
+#' separately and all merged sub-blocks were bound in one dataset afterwards.
+#' 
+#' In a final step, declared landings (in weight and value) at the ICES
+#' rectangle scale from logbooks were further allocated at a lower spatial and
+#' time scale level with regard to the VMS information.  This was done by
+#' allocating a proportion of the landings to each detected fishing position
+#' depending on the level of matching between VMS and logbook data as described
+#' in step 2. In the case of full matching, landings allocated to a given
+#' position were proportional to the number of detected fishing positions lying
+#' inside each declared logbook area for a given date, assuming that the total
+#' landings declared in this area at this date are divided equally among all of
+#' these positions. In the particular case of the use of several gear types
+#' within the same rectangle and also for the same catch date, an equal share
+#' of landings between gear types was also assumed for this day. In case of
+#' partial matching, the following procedure was used. First, when the match
+#' was correct by area but not by catch date, the landings were equally
+#' dispatched between all the fishing positions of this trip detected in this
+#' particular ICES rectangle. Second, when the match failed on both catch date
+#' and area, the landings concerned were equally allocated to all the detected
+#' fishing positions of the trip for which no previous match has been detected.
+#' Finally, residual landings (kept in the final output) occurred when the
+#' match failed both for the catch date and area, and no fishing position
+#' remained.
+#' 
+#' Warning: However, one should take care that the generated maps related to
+#' the origin of landings do not entirely reflect the spatial distributions of
+#' the harvested populations but more accurately should be interpreted as the
+#' conjunction between the stocks and the fishermen effort distribution.  The
+#' distribution of the landings on discrete positions should also be understood
+#' with some cautions ( albeit the number of pings may be high enough to be
+#' considered representative on a large part of the total exploited areas) and
+#' possible allocation of landings on locations we know that a stock is not
+#' present might potentially occur.
+#' 
+#' in tacsat SI_STAT, fishing should be coded 1 and non-fishing should be coded
+#' 2. The method is very sensitive to missing harbours. Some warning messages
+#' are then provided to help the user to detect some repeated positions that
+#' are likely to be missing harbours.  In this last case you should revise the
+#' harbour list before processing again.
+#' 
+#' @param eflalo data.frame, eflalo format
+#' @param tacsat data.frame, tacsat format
+#' @param general list, general settings to go through all sub-functions e.g.
+#' detectFishing
+#' @param vessels (optional) name of the vessel(s) to process e.g. those in
+#' tacsat
+#' @return Nothing is returned but a merged data.frame per vessel in the output
+#' folder.  These data.frame could be later bound into a big one using
+#' bindAllMergedTable()
+#' @author Francois Bastardie
+#' @seealso \code{\link{vmsGridCreate}}
+#' @references Bastardie et al. 2010. Fisheries Research
+#' @examples
+#' 
+#' 
+#'   \dontrun{
+#'   data(eflalo)
+#'   data(tacsat)
+#'   data(euharbours)
+#' 
+#'   # format
+#'   eflalo <- formatEflalo(eflalo)
+#'   tacsat <- formatTacsat(tacsat)
+#' 
+#'   # order tacsat chronologically with library(doBy)
+#'   tacsat <- sortTacsat(tacsat)
+#' 
+#'   # test each ping if in harbour or not
+#'   tacsat$SI_HARB <- NA
+#'   euharbours$Description <- euharbours$harbour
+#'   tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG),
+#'                                    lat=anf(tacsat$SI_LATI),
+#'                                    harbours=euharbours,
+#'                                    rowSize=30, returnNames=TRUE)
+#'   inHarb <- tacsat$SI_HARB
+#'   inHarb <- replace(inHarb, !is.na(inHarb), 1)
+#'   inHarb <- replace(inHarb, is.na(inHarb), 0)
+#'   inHarb <- as.numeric(inHarb)
+#' 
+#'   # assign a trip identifier
+#'   tacsat$SI_FT <- 1 # init
+#'   idx <- which(inHarb==0)
+#'   tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index
+#' 
+#'   # keep 'out of harbour' points only
+#'   # (but keep the departure point and the arrival point lying in the harbour)
+#'   startTrip <- c(diff(tacsat[,"SI_FT"]), 0)
+#'   endTrip   <- c(0, diff(tacsat[,"SI_FT"]))
+#'   tacsat[which(startTrip>0),"SI_FT"]  <-  tacsat[which(startTrip>0)+1,"SI_FT"] 
+#'   tacsat[which(endTrip<0),"SI_FT"]    <-  tacsat[which(endTrip<0)-1,"SI_FT"] 
+#'   tacsat <- tacsat[which(inHarb==0 |  startTrip>0 |  endTrip<0),]
+#' 
+#' 
+#'   # assign a state to each ping (here, useless if detectFishing at TRUE)
+#'   tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming)
+#'   # fake speed rule for fishing state
+#'   tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1
+#' 
+#' 
+#'   # reduce the size of the eflalo data by merging species
+#'   # (assuming that the other species is coded MZZ), threshold in euros.
+#'   eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ")
+#' 
+#'   # debug if eflalo has not been cleaned earlier
+#'   eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),]
+#'   
+#'   # an informed VE_FLT is also required
+#'   if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1"
+#'   
+#'   # possible mis-naming mistakes
+#'     if(!match('LE_MET_level6',colnames(eflalo))>0){
+#'       eflalo$LE_MET_level6 <- eflalo$LE_MET
+#'     }
+#' 
+#'   # debug
+#'   eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",]
+#' 
+#' 
+#'   # TEST FOR A GIVEN SET OF VESSELS
+#'   # (if detect.fishing is true then do also detection of fishing activity
+#'   # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states
+#'   # that will overwrite the existing SI_STATE)
+#'   mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"),
+#'                      general=list(output.path=file.path("C:","output"),
+#'                      visual.check=TRUE, detectFishing=TRUE, speed="segment",
+#'                      what.speed="calculated"))
+#'   # ...OR APPLY FOR ALL VESSELS IN eflalo
+#'   mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat,
+#'                      general=list(output.path=file.path("C:","output"),
+#'                      visual.check=TRUE, detectFishing=TRUE, speed="segment",
+#'                      what.speed="calculated"))
+#'   gc(reset=TRUE)
+#' 
+#'   # load the merged output table for one vessel
+#'   load(file.path("C:","output","merged_804_1800.RData"))
+#' 
+#'   # check the conservation of landings
+#'   sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE))
+#'   sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE)
+#' 
+#' 
+#'    # ...or bind all vessels (keeping only some given species here)
+#'   bindAllMergedTables (vessels=c("738", "804"), a.year = "1800",
+#'                       species.to.keep=c("PLE","COD"),
+#'                       folder = file.path("C:","output"),
+#'                       all.in.one.table=TRUE)
+#' 
+#'     # ...and load the merged output table for all vessels
+#'   load(file.path("C:","output","all_merged__1800.RData"))
+#' 
+#'   # map landing of cod from all studied vessels
+#'   # ( with debugging if tacsat has not been cleaned earlier)
+#'   graphics.off()
+#'   df1<- all.merged[, c("SI_LATI","SI_LONG","LE_KG_COD")]
+#'   df1$SI_LONG <- anf(df1$SI_LONG)
+#'   df1$SI_LATI <- anf(df1$SI_LATI)
+#'   df1 <-   df1[ !is.na(df1$SI_LATI),]
+#'   df1 <-   df1[ !is.na(df1$SI_LONG),]
+#'   vmsGridCreate(df1,nameLon="SI_LONG", nameLat="SI_LATI",
+#'                 nameVarToSum = "LE_KG_COD", cellsizeX =0.1,
+#'                 cellsizeY =0.05,  legendtitle = "COD landings (kg)")
+#' 
+#'  # but you need to remove steaming points before gridding!
+#'   df2<-df1[-which(is.na(df1$LE_KG_COD)),]
+#'   vmsGridCreate(df2,nameLon="SI_LONG",nameLat="SI_LATI", we = 3, ea = 6, so = 50, no = 54,
+#'                 nameVarToSum = "LE_KG_COD",cellsizeX =0.1,
+#'                 cellsizeY =0.05,  legendtitle = "COD landings (kg)", plotPoints =TRUE, 
+#'                 breaks0=c(1,2,4,8,16,32,64,100000))
+#' 
+#' 
+#' 
+#'   # CONVERT TO FISHFRAME FORMAT (might take some time running)
+#'   # (by default, this will keep all the species in the output table)
+#'   tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF),
+#'                               species.to.keep=character(),
+#'                               folder = file.path("C:","output"),
+#'                               all.in.one.table=FALSE)
+#' 
+#'   ff  <- pings2Fishframe (general=list(output.path=file.path("C:","output"),
+#'                           a.year=1800, a.country="NLD", degree=0.05 ) )
+#' 
+#'   }
+#' 
+#' 
+#' @export mergeEflalo2Pings
+mergeEflalo2Pings <-
+           function(eflalo, tacsat, vessels=unique(eflalo$VE_REF), general=list(output.path=file.path("C:"),
+                     visual.check=TRUE, detectFishing=FALSE, speed="segment", what.speed="calculated", conserve.all=TRUE,
+                       ), ...){
+
+  lstargs <-  as.list( sys.call() ) # equivalent to lstargs <- list(...) but suppress the r cmd build warning? 
+
+  # create required folders for outputs
+  cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n")
+  dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777")
+
+
+ #!!!!!!!!!!!!!!!!!!!!!#
+ #!!!!!!!!!!!!!!!!!!!!!#
+ # utils--
+ collapse.all.columns <- function (obj, columns= seq(ncol(obj)) ){
+            eval(parse(text=paste('paste(obj[,', paste(columns,collapse='] ,"#", obj[,'), '],sep=\'\')', sep='')))  }
+ uncollapse.column <-  function(obj, column="coll"){
+            dd<- strsplit(as.character(obj[,column]),"#") ; nco <- length(dd[[1]]) ; dd<- unlist(dd)
+            res <- eval(parse(text=paste('data.frame(',paste('dd[seq(',1:nco,',nrow(obj)*nco,by=nco)]', collapse=','),')')))
+            colnames(res) <- paste("col",1:nco,sep='')
+            return(res)
+            }
+
+  #utils--
+  # FUNCTION TO CREATE A SPATIAL GRID
+  # 'xx' have a 'SI_LATI' and a 'SI_LONG' columns
+  assignPointsToSpatialGrid <- function(xx){
+
+    xx <- xx[,!colnames(xx) %in% c("icessquare","icessquare.vms") ]  # remove
+    xx <- cbind.data.frame(xx, icessquare= rep(0,nrow(xx)))
+
+
+    rlong      <- range(anf(xx$SI_LONG),na.rm=TRUE)
+    vect.long  <- signif(seq(floor(rlong[1]), ceiling(rlong[2]), by=1),4)   # long (x)
+    label.long <- rep(paste(rep(LETTERS,each=10),0:9,sep=""),each=1)
+    names(label.long) <- signif(seq(-50, 209, by=1),4)   # long (x)
+    label.long <- label.long[!is.na(names(label.long))]  # => correspondance long (-50 to 209) / sq letter (A0 to Z9)
+    label.long <- label.long[as.character(vect.long)]
+    rlat      <- range(anf(xx$SI_LATI), na.rm=TRUE)
+    vect.lat   <- signif(seq(floor(rlat[1]), ceiling(rlat[2]),by=0.5),4) # lat  (y)
+    label.lat  <- rep(paste(seq(1,75,1)),each=1)
+    names(label.lat) <-   paste(signif(seq(36,73, by=0.5),4))
+    label.lat <- label.lat[!is.na(names(label.lat))] # => correspondance lat (36 to 73) / sq number (1 to 75)
+    label.lat <- label.lat[as.character(vect.lat)]
+    vect.label <- paste(rep(label.lat,each=length(label.long)),"",label.long,sep="")
+    xx[,"SI_RECT"] <- paste(label.lat [findInterval(anf(xx[,"SI_LATI"]), vect.lat)] , label.long [findInterval(anf(xx[,"SI_LONG"]), vect.long)], sep="")
+
+   return(xx)
+   }
+
+
+   #!!!!!!!!!!!!!!!!!!!!!#
+   #utils--
+   # for managing NA on logbook side
+   # (from vms trip.sq without corresponding logbook trip.sq e.g. because no declaration in sq because only steaming time inside)
+   # we need to inform back the specificity of the vessel from logbook using info from the same trip i.e. vesselid+FT_REF
+   retrieveOnBkSide <- function(merged, type.data){
+      idx <- which(merged$LE_MET_level6=="NA")
+      merged.NA <- merged[idx,] # input (only the trip.sq with NA for the logbook part)
+
+      for (td in type.data){
+         map <- tapply(merged[, td ], paste(merged$VE_REF, merged$FT_REF),
+                             function(i) {ss<- unique(as.character(i)) ; ss[ss!="NA"][1]})
+         merged.NA[, td ] <- factor(paste(merged.NA$VE_REF,merged.NA$FT_REF))
+         levels(merged.NA[, td ]) <- map[levels(merged.NA[, td ])]
+         }
+      if(nrow(merged.NA)>0) merged.NA$flag <- 4 # flag on meth
+      merged[idx,] <- merged.NA # output
+      return(merged)
+      }
+
+      #!#!##!#!##!#!##!#!##!#!##!#!#
+      #!#!##!#!##!#!##!#!##!#!##!#!#
+      #!#!##!#!##!#!##!#!##!#!##!#!#
+      #!#!##!#!##!#!##!#!##!#!##!#!#
+      #!#!##!#!##!#!##!#!##!#!##!#!#
+      all.vesselid     <- as.character(unique(eflalo[anf(eflalo$VE_LEN)>=0,]$VE_REF))
+      all.vesselid     <- all.vesselid[!is.na(all.vesselid)] # e.g. when VE_LEN at NA exists
+      if(length(vessels)!=0) all.vesselid <- vessels
+       # => IF ARG INFORMED, THEN KEEP ONLY ONE OR SEVERAL VESSELS AS NEEDED....
+
+      for(a.vesselid in all.vesselid){  # PER VESSEL
+                cat(paste(a.vesselid,"\n", sep="" ))
+
+         #----------
+         #----------
+         #----------
+         #----------
+         #----------
+         #----------
+         # LOGBOOK INPUT
+         logbk.this.vessel            <- eflalo[eflalo$VE_REF %in% a.vesselid,]
+         logbk.this.vessel$LE_RECT    <- factor(logbk.this.vessel$LE_RECT)
+         logbk.this.vessel$VE_REF     <- factor(logbk.this.vessel$VE_REF)
+         logbk.this.vessel$VE_FLT     <- factor(logbk.this.vessel$VE_FLT)
+
+
+         # automatic detection of a.year
+         general$a.year <-   format(strptime(  paste(logbk.this.vessel$FT_DDAT[1]) , tz='GMT',  "%e/%m/%Y" ), "%Y")
+
+           # departure time
+           logbk.this.vessel$LE_DTIME <- as.POSIXct(  paste(logbk.this.vessel$FT_DDAT, logbk.this.vessel$FT_DTIME) ,
+                                                                tz='GMT',  "%e/%m/%Y %H:%M" )
+           # arrival time
+           logbk.this.vessel$LE_LTIME <- as.POSIXct(  paste(logbk.this.vessel$FT_LDAT, logbk.this.vessel$FT_LTIME) ,
+                                                                tz='GMT',  "%e/%m/%Y %H:%M" )
+           # catch.date
+           logbk.this.vessel$LE_CTIME <- as.POSIXct(  paste(logbk.this.vessel$LE_CDAT) , tz='GMT',  "%e/%m/%Y" )
+
+           # mid time bk trips
+           LE_MIDTIME <- rep(NA, nrow(logbk.this.vessel))
+           dep <- logbk.this.vessel$LE_DTIME +10  # we artificially add +10min because bug in R if mid-time is 00:00:00
+           arr <- logbk.this.vessel$LE_LTIME +1
+           for(r in 1:length(dep)){
+              LE_MIDTIME[r] <- as.character(seq(from=dep[r], to=arr[r], length.out = 3)[2])
+              }
+           logbk.this.vessel$LE_MIDTIME          <-  LE_MIDTIME
+
+           if(!"FT_REF" %in% colnames(logbk.this.vessel) ) {
+             logbk.this.vessel$FT_REF              <-  factor(LE_MIDTIME) # init
+             levels(logbk.this.vessel$FT_REF)      <- 1:length(logbk.this.vessel$FT_REF) # assign a FT_REF code
+             }   # only if FT_REF is actually not already informed
+
+
+
+
+         #=> LOGBOOK (EFLALO) INPUT REQUIRES AT LEAST,
+         #     'VE_REF',  FT_DDAT, FT_DTIME, FT_LDAT, FT_LTIME, FT_CDAT,
+         #  'LE_SP_KG' (etc.), 'LE_RECT', 'VE_FLT' AND 'LE_MET_level6', 'LE_GEAR' COLUMNS
+         #
+
+         #----------
+         #----------
+         #----------
+         #----------
+         #----------
+         #----------
+         # VMS INPUT: load traj with 'at sea' pings SI_STATE informed
+         # ABSOLUTELY REQUIRED: c("VE_REF","SI_LATI","SI_LONG", "SI_DATE", "SI_TIME", "SI_FT", "SI_HARB", "SI_STATE")
+
+
+         if(a.vesselid %in% unique(tacsat$VE_REF)){
+
+         tacsat.this.vessel <- tacsat[tacsat$VE_REF == a.vesselid,] # subset for this vessel
+         tacsat.this.vessel$VE_REF <- factor(tacsat.this.vessel$VE_REF)
+
+
+         # if does not exist, add SI_DATIM for handling the time in R
+          tacsat.this.vessel$SI_TIME <- as.character(tacsat.this.vessel$SI_TIME)
+          tacsat.this.vessel[tacsat.this.vessel$SI_TIME=="24:00", "SI_TIME"] <- "00:00"  # debug
+          tacsat.this.vessel[tacsat.this.vessel$SI_DATE=="29/02/1801", "SI_DATE"] <- "01/03/1801"  # debug
+          tacsat.this.vessel$SI_DATIM <- as.POSIXct(  paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME) ,
+                                 tz='GMT',   "%d/%m/%Y %H:%M" )
+          tacsat.this.vessel <- tacsat.this.vessel[!is.na(tacsat.this.vessel$SI_DATIM),]   # debug e.g. when 29/02/1801
+         
+         
+         # keep only the essential
+         vms.this.vessel  <- tacsat.this.vessel [, c("VE_REF","SI_LATI","SI_LONG",
+                          "SI_DATIM","SI_FT", "SI_SP", "SI_HE", "SI_HARB", "SI_STATE")]
+         rm(tacsat.this.vessel); gc(reset=TRUE)
+         vms.this.vessel$VE_REF   <- factor(vms.this.vessel$VE_REF)
+
+         vms.this.vessel$idx  <- 1:nrow(vms.this.vessel) # label for each ping
+
+
+
+
+         # filter if vessel with a bad vms
+         to.remove.because.deficient.vms <- any(is.na(vms.this.vessel$SI_FT))
+         to.remove.because.not.enough.vms.trips <- length(unique(vms.this.vessel$SI_FT))< 2  # nb vms trips < 2
+         if(length(unique(vms.this.vessel$SI_FT))<2) warning('need more than 1 trip in SI_FT')
+
+         # filter if vessel with a bad logbook
+         to.remove.because.pble.lgbk <- length(unique(logbk.this.vessel$FT_REF))< 2  # nb logbk trips < 2
+
+         # then...
+         a.flag <- to.remove.because.deficient.vms ||  to.remove.because.not.enough.vms.trips || to.remove.because.pble.lgbk
+
+         ## remove FT_REF and SI_MIDTIME if it exists
+         vms.this.vessel <- vms.this.vessel[, !colnames(vms.this.vessel) %in% c("FT_REF", "SI_MIDTIME")]
+
+
+
+        if(a.flag==FALSE) {  # i.e. vms-equipped
+
+           if(all(is.na(vms.this.vessel$SI_STATE)) && general$detectFishing==FALSE)
+                  stop('the SI_STATE column has to be informed before making the merging')
+           if(all(is.na(logbk.this.vessel$VE_FLT)))
+                  stop('the VE_FLT column has to be informed before making the merging')
+
+         # alias
+         .logbk <- logbk.this.vessel
+         .vms   <- vms.this.vessel
+
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         #!  DO THE LINK - APPROACH 1 #!#!!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+       NIELS <- FALSE
+       if(NIELS){
+           eftim <- .logbk[which(duplicated(.logbk$FT_REF)==FALSE),c("LE_DTIME","LE_LTIME","FT_REF")]
+           dtime <- eftim[,1]
+           ltime <- eftim[,2]
+           stime <- .vms$SI_DATIM
+           tripn <- eftim[,3]
+
+
+
+                              smdtime <- t(outer(stime,dtime,"-"))
+                              gtltime <- outer(ltime,stime,"-")
+
+                              #-Find first point where tacsat time is greater or equal to departure time and smaller than arrival time
+                              st <- apply(smdtime,1,function(x){which(x>=0)[1]})
+                              en <- apply(gtltime,1,function(x){rev(which(x>=0))[1]})
+
+                              #-Make sure that values are within the interval of departure and arrival time
+                              subse <- which(is.na(st <= en) == FALSE & (st <= en) == TRUE)
+
+                              st <- st[subse]
+                              en <- en[subse]
+
+                              #-Assign Tacsat data with FT_REF from Eflalo2 dataset where they link
+
+                              if(length(st)!=1){
+
+                                idx   <- unlist(mapply(seq,st,en,SIMPLIFY=FALSE))
+                                reps  <- unlist(lapply(mapply(seq,st,en,SIMPLIFY=FALSE),length))
+                                .vms$FT_REF      <- 0
+                                .vms$FT_REF[idx] <- rep(tripn[subse],reps)
+                              }
+                              if(length(st)==1){
+                                .vms$FT_REF <- 0
+                                .vms$FT_REF[seq(st,en)] <- rep(tripn[subse],length(seq(st,en)))
+                              }
+                              if(length(st)==0){
+
+                                .vms$FT_REF <- 0
+                              }
+
+
+           } # end NIELS
+
+   FRANCOIS <- TRUE
+   if(FRANCOIS){
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         #!  DO THE LINK - APPROACH 2 #!#!!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+
+         #!!!!!!!!!!!!!!!!!!#
+         #!!!!!!!!!!!!!!!!!!#
+         # -If IT DOES NOT EXIST YET-,
+         # FIND THE MID-TIME OF VMS TRIPS
+         if(any(colnames(.vms)%in%"SI_DATIM")){
+           if(!any(colnames(.vms)%in%"SI_DTIME")){
+            # find and add the first point of each trip
+           .vms$start.trip <- c(1,diff(.vms[,"SI_FT"]))
+           .vms$end.trip <- c(diff(.vms[,"SI_FT"]), .vms[nrow(.vms),"SI_FT"])
+           .vms[.vms$start.trip>0, "start.trip"] <- .vms[.vms$start.trip>0, "SI_FT"]
+           .vms[.vms$end.trip>0, "end.trip"] <- .vms[.vms$end.trip>0, "SI_FT"]
+
+           tmp <- .vms[.vms$start.trip>0,]
+           tmp <- tmp[,c("VE_REF","SI_DATIM","SI_FT")]
+           tmp2 <- .vms[.vms$end.trip>0,]
+           tmp2 <- tmp2[,c("VE_REF","SI_DATIM","SI_FT")]
+           .vms <- .vms[,!colnames(.vms) %in% c("start.trip", "end.trip")] # remove tool columns
+           table.midtime <- merge(tmp, tmp2, by.x="SI_FT", by.y="SI_FT")
+           table.midtime <- table.midtime[, c("SI_FT","VE_REF.x","SI_DATIM.x","SI_DATIM.y") ]
+           colnames(table.midtime) <- c("SI_FT","VE_REF","SI_DTIME","SI_ATIME")
+           } else{
+           table.midtime <- .vms[, c("SI_FT","VE_REF","SI_DTIME","SI_ATIME") ]
+           table.midtime <- table.midtime[!duplicated(data.frame(table.midtime$SI_FT, table.midtime$VE_REF)),]
+           }
+         } else{stop("no 'SI_DATIM' found in vms")}
+         SI_MIDTIME <- rep(0, nrow(table.midtime))
+
+         for(r in 1: nrow(table.midtime)){
+           SI_MIDTIME[r] <-  as.character(seq(from=table.midtime$SI_DTIME[r], to=table.midtime$SI_ATIME[r], length.out = 3)[2])
+
+         }
+         table.midtime$SI_MIDTIME <-  SI_MIDTIME
+         if(!any(colnames(.vms)%in%"SI_MIDTIME")){ # here we are...
+              .vms <- merge(.vms, table.midtime[,c("SI_FT","SI_MIDTIME")], by.x="SI_FT", by.y="SI_FT")
+         }
+
+
+
+        #!!!!!!!!!!!!!!!!!!#
+        #!!!!!!!!!!!!!!!!!!#
+        # ASSIGN A 'BK.TRIPNUM' FROM LOGBOOK TO EACH VMS TRIP
+         trunk <-1 # trunk give the part of the year to be plotted (1 to 5)
+         # visual check
+         if(general$visual.check){
+            windows(width=8, height=4)
+            ltrunk <- (nrow(table.midtime)/5)
+            idxtrunk <-  (trunk+(trunk-1)*ltrunk):(trunk*ltrunk)
+             # plot(table.midtime$SI_DTIME[idxtrunk],rep(1,length(table.midtime$SI_DTIME[idxtrunk])),
+             plot(table.midtime$SI_DTIME, rep(1,length(table.midtime$SI_DTIME)),
+                 ylim=c(0,0.52), type="n", ylab="", axes=FALSE)
+            r <- as.POSIXct(round(range(table.midtime$SI_DTIME), "days"))
+            axis.POSIXct(1, at=seq(r[1], r[2], by="month"), format="%e%b%y:%H:%M")
+            axis(2, at=c(0.5,0.1),labels=c("VMS","LOGBOOK"))
+
+            for(i in 1:nrow(table.midtime))  {
+              segments(as.POSIXct(table.midtime$SI_DTIME[i], tz='GMT'), 0.5, as.POSIXct(table.midtime$SI_ATIME[i], tz='GMT'), 0.5, col=1)
+              points(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.5, col=1)
+              text(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.52, table.midtime$SI_FT[i], cex=0.5, col=1)
+
+            }
+
+            tmp <- .logbk[, c("LE_DTIME","LE_LTIME", "LE_MIDTIME", "FT_REF")]
+            tmp <- tmp[!duplicated(tmp$LE_MIDTIME), ]
+            for(i in 1:nrow(tmp)){
+              segments(as.POSIXct(tmp$LE_DTIME[i], tz='GMT'), 0.1, as.POSIXct(tmp$LE_LTIME[i], tz='GMT'), 0.1, col=1)
+              points(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.1, col=1)
+              text(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.0785, tmp$FT_REF[i], cex=0.5, col=1)
+            }
+          }
+
+
+          # THE CORE CODE: compare bk$LE_MIDTIME and vms$SI_MIDTIME
+          # find the nearest bk$LE_MIDTIME for each vms$SI_MIDTIME
+          # and then change levels
+          # (so, for each mid.time in vms, a FT_REF will be find)
+          # (so, no lines in vms without a FT_REF from bk...)
+          fa1 <- levels(factor(.vms$SI_MIDTIME))
+          new.levels <- fa1
+          fa2 <-  levels(factor(.logbk$LE_MIDTIME))
+          for(i in 1:length(fa1)) { # for each level in vms
+             tmp <-  abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] ))
+             if(all(is.na(tmp))) tmp <- abs(as.numeric( as.Date(fa2) - as.Date(fa1)[i] )) # debug the R bug in case of mid-time at 00:00 hour
+             new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )]  # find the nearest level in logbook
+          }
+          .vms$SI_MIDTIME <- factor(as.character(.vms$SI_MIDTIME))
+          sauv <- .vms$SI_MIDTIME
+          levels(.vms$SI_MIDTIME) <- new.levels # and change mid.time in vms to force the merging
+
+          # finally, replace levels by the FT_REF
+          tmp <-  .logbk[.logbk$LE_MIDTIME %in% .vms$SI_MIDTIME , c("FT_REF","LE_MIDTIME")]
+          tmp2 <- tmp[!duplicated(tmp$FT_REF),]
+          idx <- match( levels(.vms$SI_MIDTIME), tmp2$LE_MIDTIME )
+          .vms$FT_REF <- .vms$SI_MIDTIME # init
+          levels(.vms$FT_REF) <- as.character(tmp2$FT_REF )   [idx]
+
+
+          if(general$visual.check){
+            for(i in 1: nrow(.vms))  {
+               arrows(as.POSIXct( sauv[i]), 0.5 ,as.POSIXct( .vms$SI_MIDTIME[i], tz='GMT'),0.1, length=0.1)
+            }
+          }
+
+          if(general$visual.check){
+            ve <- as.character(.logbk$VE_REF[1])
+            savePlot(filename = file.path(general$output.path,
+                            paste("assign_eflalo_tripnum_to_vms_",ve,"_",general$a.year,".jpeg",sep="")),type ="jpeg")
+           dev.off()
+          }
+
+
+        ## ADD A WARNING IN CASE OF LONG (UNREALISTIC) TRIPS ##
+        diff.date <- table.midtime$SI_ATIME - table.midtime$SI_DTIME    # if at least one trip >30 days
+        if(attributes(diff.date)$units=="secs")  idx <- which((((diff.date)/3600)/24) >30)
+        if(attributes(diff.date)$units=="hours")  idx <- which((((diff.date)/1)/24) >30)
+        attributes((table.midtime$SI_ATIME - table.midtime$SI_DTIME ))
+        if (length( idx) >0){
+             cat(paste("at least one vms trip > 30 days detected! check harbours...", "\n", sep=""))
+            suspicious <- .vms[.vms$SI_FT %in%  table.midtime$SI_FT[idx] ,]
+            tmp <- table(suspicious$SI_LATI)
+            lat.suspicious <- names(tmp[tmp>5])
+            if(length(lat.suspicious)!=0) cat(paste("potential harbour likely near lat ",lat.suspicious,"\n",sep=""))
+            tmp <- table(suspicious$SI_LONG)
+            long.suspicious <- names(tmp[tmp>5])
+            if(length(long.suspicious)!=0) cat(paste("potential harbour likely near long ",long.suspicious,"\n",sep=""))
+            }  # if at least one trip >30 days
+        rm(table.midtime) ; gc(reset=TRUE)
+
+
+
+         .logbk$LE_MIDTIME    <- factor(.logbk$LE_MIDTIME)
+         .logbk$FT_REF        <- factor(.logbk$FT_REF)
+         .vms$SI_MIDTIME      <- factor(.vms$SI_MIDTIME)
+         .vms$FT_REF          <- factor(.vms$FT_REF)
+
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         #! ASSIGN A 'SI_FT' FROM VMS TRIP NUM TO  #!#!#!#
+         #! LOGBOOK TRIPS WITH NO VMS CORRESPONDANCE #!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+       dep.bk.not.in.vms  <- unique( .logbk$FT_REF [ !( .logbk$FT_REF %in%  .vms$FT_REF )  ] )
+       if(length(dep.bk.not.in.vms)!=0){
+         # bk tripnum from dep not in vms
+         idx <-  .logbk$FT_REF %in% dep.bk.not.in.vms
+         bk  <- .logbk[idx,] [order(.logbk[idx,]$LE_DTIME),]
+         if(!"SI_MIDTIME" %in% colnames(.vms)){
+            vms <- .vms  [order(.vms$SI_DTIME),]
+            SI_MIDTIME <- rep(NA, nrow(vms))
+            for(r in 1: nrow(vms)){
+              SI_MIDTIME[r] <-  as.character(seq(from=vms$SI_DTIME[r], to=vms$SI_ATIME[r], length.out = 3)[2])
+            }
+            vms$SI_MIDTIME <-  SI_MIDTIME
+         } else{ vms <- .vms[order(.vms$SI_MIDTIME),]}
+         #1- compare bk$mid.time and vms$mid.time
+         # find the nearest vms$mid.time for each bk$mid.time
+         # and then change levels
+         # (so for each mid.time in bk, a tripnum will be find)
+         # (so no lines in bk without a tripnum...)
+         fa1 <- levels(factor(bk$LE_MIDTIME))
+         new.levels <- fa1
+         fa2 <-  levels(factor(vms$SI_MIDTIME))
+         for(i in 1:length(fa1)) { # for each level in logbk
+          tmp <-  abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] ))
+          new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )]  # find the nearest level in vms
+         }
+         bk$LE_MIDTIME <- factor(as.character(bk$LE_MIDTIME))
+         levels(bk$LE_MIDTIME) <- new.levels # and change mid.time in logbk to force the merging
+
+         # finally, replace levels by the tripnum
+         # (note: a same FT_REF in vms can have different mid.time
+         # due to the first merging of vms to logbk in the vms analysis)
+         tmp <-  vms[vms$SI_MIDTIME %in% bk$LE_MIDTIME , c("FT_REF","SI_MIDTIME")]
+         tmp2 <- tmp[!duplicated(data.frame(tmp$FT_REF, tmp$SI_MIDTIME)),]
+         idx2 <- match(levels(bk$LE_MIDTIME), tmp2$SI_MIDTIME)
+         bk$FT_REF <- bk$LE_MIDTIME # init
+         levels(bk$FT_REF)  <- as.character(tmp2$FT_REF) [idx2]
+
+         # output
+         bk$LE_MIDTIME   <- as.character(bk$LE_MIDTIME)
+         bk$FT_REF <- as.character(bk$FT_REF)
+         .logbk[idx,][order(.logbk[idx,]$LE_DTIME),]  <- bk
+         }
+
+
+        } # end FRANCOIS
+
+
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         # ASSIGN A RECTANGLE TO EACH PING #!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         .vms   <- assignPointsToSpatialGrid(xx=.vms)
+
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         # COMPUTE EFFORT.MINS      !#!#!#!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+          # vms
+          .vms <- .vms[order(.vms$SI_DATIM),]
+          .vms$LE_EFF_VMS <- abs(c(0, as.numeric(.vms[-nrow(.vms),"SI_DATIM"] -
+                                        .vms[-1,"SI_DATIM"], units="mins")))
+          start.trip <- c(1,diff(.vms[,"SI_FT"]))
+          .vms[start.trip!=0, "LE_EFF_VMS"] <- 0  # just correct for the trip change points
+          # logbook (start/end of trip)- caution: will be repeated per ping in the merged output
+          #SI_DATIM      <- as.POSIXct(paste(.logbk$FT_DDAT,.logbk$FT_DTIME,sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+          #SI_DATIM2     <- as.POSIXct(paste(.logbk$FT_LDAT,.logbk$FT_LTIME,sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+          #.logbk$LE_EFF <- an(difftime(SI_DATIM2,SI_DATIM,units="mins"))
+
+
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         # ASSIGN FISHING/NON-FISHING (optional)!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         if(general$detectFishing && general$speed=="segment") {
+             ## add a gear form tacsat from the logbook info (after the first merging)
+             ## because the assignement of a state is gear-specific.
+             ## caution here: we assume only one gear used inside a trip...
+             # because note that we remove 'logevent' and keep only one duplicate of tripnum
+             .vms$LE_GEAR <- factor(.vms$FT_REF) # init
+             tmp <- .logbk[,c("LE_GEAR","FT_REF")]
+             tmp <- tmp[!duplicated(tmp$FT_REF),] #remove logevent and keep only one duplicate of tripnum
+             tmp <- tmp[tmp$FT_REF %in% unique(.vms$LE_GEAR),]
+             idx <- match(levels(.vms$LE_GEAR), as.character(tmp$FT_REF))
+             dd  <-  as.character(tmp$LE_GEAR)  [idx]
+             dd  <- replace(dd, is.na(dd), "UKN") # unknown because not matched if Niels
+            levels(.vms$LE_GEAR) <- dd
+
+             # then do the assignement of the state
+             # according to a segmented regression on the speed histogram
+             .vms <- segmentTacsatSpeed (
+                                 tacsat=.vms,
+                                  vessels=a.vesselid,
+                                   force.lower.bound=0.5,  # to do: create an arg in the parent function instead...
+                                    gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'), # to do: create an arg in the parent function instead...
+                                     general=list(a.year=general$a.year,
+                                                  output.path=general$output.path,
+                                                  what.speed=general$what.speed,
+                                                  visual.check=TRUE
+                                                  )
+                                         )
+                #=> (semi)automatic detection of the fishing peak
+                # (put here because the LE_GEAR need to be informed)
+         # alternatively,
+         if(FALSE){
+             .vms$SI_STATE  <- 0
+             .vms           <- segmentedTacsatSpeed(.vms[.vms$VE_REF==a.vesselid,], units="year", analyse.by="LE_GEAR",
+                                  speed="instantaneous", logfit=FALSE, CI=0.95)
+             .vms$SI_STATE  <- .vms$SI_STATE+1 # back compatibility with mergeEflalo2Pings() i.e. 1: fishing, 2: steaming
+            # ...but what about the passive gears then?
+          }
+
+
+            .vms <- .vms[, !colnames(.vms) %in% "LE_GEAR"] # remove after use to avoid future conflict.
+         }
+         # some alternatives TO DO:
+         #if(general$detectFishing && general$speed=="lookuptable")
+         #   .vms <- lookupSpeedTacsat (tacsat=.vms, vessels=a.vesselid)
+         #if(general$detectFishing && general$speed=="bayesian")
+         #   .vms <- bayesianFiltering (tacsat=.vms, vessels=a.vesselid)
+
+
+
+         rm(er); rm(xx) ; gc(reset=TRUE)
+
+    
+            
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         # MERGING WITH VMS PER TRIP !!!!!!!!!!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         do.merging <- function(method="FT_REF", .logbk, .vms, general){
+
+              # IF BY PING-------------
+              # find total nb of FISHING ping per tripnum from vms  # used for method 1  'FT_REF'
+              if(method=="FT_REF"){
+ #              .vms$count.fping.trip  <- factor(.vms$FT_REF)  # init
+ #             count.fping.trip <- table(.vms[.vms$SI_STATE==1,]$FT_REF)
+ #             # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging
+ #             levels(.vms$count.fping.trip) <- count.fping.trip[levels(.vms$count.fping.trip)]  # mapping
+ #             .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA
+
+              # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging
+              if(any(.vms$SI_STATE==1)){
+              .vms$count.fping.trip             <- factor(.vms$FT_REF)  # init
+              countp                            <- countPings(~VE_REF+FT_REF, .vms[.vms$SI_STATE=="1",])
+              rownames(countp)                  <-  countp$FT_REF
+              levels(.vms$count.fping.trip)     <- countp[levels(.vms$count.fping.trip),"pings"]    # mapping
+              if(any(.vms$SI_STATE %in% 2)) .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA
+              } else{.vms$count.fping.trip <- NA}
+
+    
+              # => COUNT nb of gears per FT_REF because each ping will be repeated by gear after merging
+              count.gr.trip <- tapply(.logbk$LE_GEAR, .logbk$FT_REF, function(x) length(unique(x)))
+              .logbk$count.gr.trip <- count.gr.trip[.logbk$FT_REF]  # mapping
+
+              }
+
+
+              # find total nb of FISHING ping per trip-icessquare from vms  # used for method 2   'FT_REF_SQ'
+              if(method=="FT_REF_SQ"){
+     #         .vms$count.fping.trip.sq  <- factor(.vms$FT_REF_SQ)  # init
+     #         count.fping.trip.sq <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ) # COUNT nb of FISHING pings per FT_REF_SQ
+     #         levels(.vms$count.fping.trip.sq) <- count.fping.trip.sq[levels(.vms$count.fping.trip.sq)]  # mapping
+     #         if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA
+
+              if(any(.vms$SI_STATE==1)){
+              .vms$count.fping.trip.sq          <- factor(.vms$FT_REF_SQ)  # init
+              countp                            <- countPings(~VE_REF+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",])
+              rownames(countp)                  <-  interaction(countp$FT_REF,countp$SI_RECT)
+              levels(.vms$count.fping.trip.sq ) <- countp[levels(.vms$count.fping.trip.sq),"pings"]    # mapping
+              if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA
+              } else{.vms$count.fping.trip.sq <- NA}
+
+
+              # => COUNT nb of gears per FT_REF_SQ because each ping will be repeated by gear after merging
+              count.gr.trip.sq <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ, function(x) length(unique(x)))
+              .logbk$count.gr.trip.sq <- count.gr.trip.sq[.logbk$FT_REF_SQ]  # mapping
+              }
+
+
+              # find total nb of FISHING ping per trip-icessquare-day from vms  # used for method 3   'FT_REF_SQ_DAY'
+              if(method=="FT_REF_SQ_DAY"){
+#              .vms$count.fping.trip.sq.day  <- factor(.vms$FT_REF_SQ_DAY)  # init
+#              count.fping.trip.sq.day <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ_DAY) # COUNT nb of FISHING pings per FT_REF_SQ_DAY
+#              levels(.vms$count.fping.trip.sq.day) <- count.fping.trip.sq.day[levels(.vms$count.fping.trip.sq.day)]  # mapping
+#              if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA
+
+
+              if(any(.vms$SI_STATE==1)){
+              .vms$count.fping.trip.sq.day          <- factor(.vms$FT_REF_SQ_DAY)  # init
+              countp                                <- countPings(~VE_REF+day+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",])
+              rownames(countp)                      <-  interaction(countp$FT_REF, countp$SI_RECT, countp$SI_DAY)
+              levels(.vms$count.fping.trip.sq.day) <- countp[levels(.vms$count.fping.trip.sq.day),"pings"]    # mapping
+              if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA
+              } else{.vms$count.fping.trip.sq.day <- NA}
+
+
+              # => COUNT nb of gears per FT_REF_SQ_DAY because each ping will be repeated by gear after merging
+              count.gr.trip.sq.day <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ_DAY, function(x) length(unique(x)))
+              .logbk$count.gr.trip.sq.day <- count.gr.trip.sq.day[.logbk$FT_REF_SQ_DAY]  # mapping}
+              }
+
+
+
+              # do the merging between .logbk and .vms according to
+              #  meth1: 'FT_REF' OR meth2: 'FT_REF_SQ' OR meth3: 'FT_REF_SQ_DAY'
+              # need to use a trick to avoid "out of memory" doing the merge()
+              coln.idx1 <- which(!colnames(.logbk)%in%c("VE_REF", method))
+              coln1 <- colnames(.logbk)[coln.idx1]
+              tmp1 <- data.frame(coll= collapse.all.columns  (.logbk, columns= coln.idx1  ),
+                         VE_REF=.logbk$VE_REF, a.method= .logbk[,method] ) #.logbk
+              coln.idx2 <- which(!colnames(.vms)%in%c("VE_REF", method))
+              coln2 <- colnames(.vms)[coln.idx2]
+              tmp2 <- data.frame(coll2= collapse.all.columns  (.vms, columns=  coln.idx2 ),
+                         VE_REF=.vms$VE_REF, a.method= .vms[,method] )  #.vms
+              tmp1[,"a.method"] <- factor(tmp1[,"a.method"] )
+              tmp2[,"a.method"] <- factor(tmp2[,"a.method"] )
+
+              merged.this.vessel <- merge(tmp1, tmp2, all.x=TRUE, all.y=TRUE, suffixes = c(".bk",".vms"))
+              #=> so, with all.y = TRUE, the vms records without corresponding logbk records are kept and NA are produced on the logbook part
+              #=> so, with all.x = TRUE, the logbk records  without corresponding vms records are kept and NA are produced on the vms part
+              merged.this.vessel$coll <- replace(as.character(merged.this.vessel$coll),is.na(merged.this.vessel$coll), paste(rep("NA",length(coln1)),collapse="#"))
+              merged.this.vessel$coll <- factor(merged.this.vessel$coll)
+              #=> adapt 'coll' to get a vector of NA (NA in case of 'in vms but not in logbook')
+              merged.this.vessel$coll2 <- replace(as.character(merged.this.vessel$coll2),is.na(merged.this.vessel$coll2), paste(rep("NA",length(coln2)),collapse="#"))
+              # adapt 'coll2' to get a vector of NA (NA in case of 'in logbook but not in vms')
+              merged.this.vessel$coll2 <- factor(merged.this.vessel$coll2)
+              colnames(merged.this.vessel)[colnames(merged.this.vessel)%in%"a.method"] <- method
+
+              tmp3 <- uncollapse.column(merged.this.vessel, column="coll")  # logbk
+              tmp4 <- uncollapse.column(merged.this.vessel, column="coll2") # vms
+              tmp5 <- cbind.data.frame(merged.this.vessel[,c("VE_REF", method)], tmp3, tmp4)
+              colnames(tmp5) <- c("VE_REF", method, coln1, coln2)
+              merged.this.vessel <- tmp5
+
+              # we can choose to correct to keep the land. weight:
+              # the loss in weight will come from the matching records having catches but
+              # without fishing pings (i.e. only steaming pings)!
+              if(is.null(general$conserve.all)) general$conserve.all <- FALSE
+              if(general$conserve.all){
+              # do the conservation of landings anyway?
+              # detect possible weight landed while no feffort detected from vms
+                   # find FT_REF with some NA
+                   vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip=="NA","FT_REF"]))
+                   # then, find FT_REF with at least one no NA
+                   no.vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip!="NA","FT_REF"]))
+                   tripnum.all.na.inside <- vv[!vv%in%no.vv] # trip num without at least one count.fping!
+                   # so, deduce loss in weight
+                   zz<- merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,]
+
+                  if(method=="FT_REF"){
+                     # in this case, reallocate evenly between all pings (caution: including steaming pings)
+                     merged.this.vessel[,"count.fping.trip"] <- anf(merged.this.vessel[,"count.fping.trip"])
+                     merged.this.vessel$FT_REF <- factor( merged.this.vessel$FT_REF)
+                     nbpings.per.trip <- unlist(lapply(split(merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,],
+                                           merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,]$FT_REF),nrow))
+                     merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "count.fping.trip"] <- rep(nbpings.per.trip,nbpings.per.trip )
+                     merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "flag"] <- 5
+                    }
+                } # end conserve.all
+
+
+
+
+              # apply the catches re-distribution
+              # method 1, 2 and 3: per ping
+              # PER PING:
+              # ASSUMING EQUAL ALLOCATION BETWEEN FISHING PINGS AND GEARS USE INSIDE A SAME TRIP
+              nm        <- names(merged.this.vessel)
+              idx.col.w <- grep('KG', nm) # index columns with species weight
+              idx.col.v <- grep('EURO', nm) # index columns with species value
+              idx.col <- c(idx.col.w, idx.col.v)
+              if(method=="FT_REF_SQ_DAY"){
+                             merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) /
+                                                        anf(merged.this.vessel$count.fping.trip.sq.day)) /
+                                                                        anf(merged.this.vessel$count.gr.trip.sq.day)
+              }
+              if(method=="FT_REF_SQ"){
+                             merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) /
+                                                        anf(merged.this.vessel$count.fping.trip.sq)) /
+                                                                        anf(merged.this.vessel$count.gr.trip.sq)
+              }
+              if(method=="FT_REF"){
+                             merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) /
+                                                           anf(merged.this.vessel$count.fping.trip) ) /
+                                                                        anf(merged.this.vessel$count.gr.trip)
+              }
+
+
+   
+
+              return(merged.this.vessel)
+              }
+
+
+
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         # SET UP PRIMARY KEYS FOR MERGING!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         .logbk$FT_REF_SQ_DAY <- factor(paste(.logbk$FT_REF, ".", .logbk$LE_RECT,".", an(format(.logbk$LE_CTIME,  '%j')), sep=''))
+         .vms$FT_REF <- factor(.vms$FT_REF)
+         .vms$FT_REF_SQ <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT, sep=''))
+         .vms$FT_REF_SQ_DAY <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT,".", an(format(.vms$SI_DATIM,  '%j')), sep=''))
+
+
+   
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         # AGGREGATE WEIGHT PER SPECIES !#!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         nm         <- names(.logbk)
+         idx.col.w  <- grep('KG', nm) # index columns with species weight
+         idx.col.v  <- grep('EURO', nm) # index columns with species value
+         idx.col    <- c(idx.col.w, idx.col.v)
+           
+         DT  <- data.table(.logbk) # library data.table for fast grouping replacing aggregate()
+         # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ_DAY (NOTE: SO, 'LE_SEQNUM' IS AGGREGATED HERE)
+         eq1  <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep=""))
+         .logbk <- DT[,eval(eq1),by=list(FT_REF_SQ_DAY,VE_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)]
+         .logbk <- data.frame(.logbk)
+         colnames(.logbk) <- c("FT_REF_SQ_DAY","VE_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col])
+           
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+         # MERGING PROCEDURE CHOICE !#!#!#!#!#!#!#!#!#!#!#
+         #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#
+
+         my.split <- function(obj,a.sep="\\.",idx=1) unlist(lapply(strsplit(obj, a.sep),function(x)x[idx]))
+         # reduce the level
+         .logbk$FT_REF_SQ  <-  factor(paste(my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=1),
+                                                 my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=2),sep='.'))
+         # reduce the level
+         .logbk$FT_REF     <-       factor(my.split(as.character(.logbk$FT_REF_SQ),a.sep="\\.",idx=1))
+         
+         # find common keys
+         tripnum.sq.day.in.vms.and.in.bk <- .vms$FT_REF_SQ_DAY [.vms$FT_REF_SQ_DAY %in% .logbk$FT_REF_SQ_DAY]
+         tripnum.sq.in.vms.and.in.bk     <- .vms$FT_REF_SQ [.vms$FT_REF_SQ %in% .logbk$FT_REF_SQ]
+         .vms.in.bk                      <- .vms[ .vms$FT_REF_SQ_DAY %in%  tripnum.sq.day.in.vms.and.in.bk,]
+         .vms.in.bk2                     <- .vms[ !(.vms$FT_REF_SQ_DAY %in%  tripnum.sq.day.in.vms.and.in.bk) &
+                                                            .vms$FT_REF_SQ %in%  tripnum.sq.in.vms.and.in.bk,]
+         in.bk.and.feffort.not.at.0   <- unique(.vms.in.bk[.vms.in.bk$SI_STATE==1,]$FT_REF_SQ_DAY)
+         in.bk2.and.feffort.not.at.0   <- unique(.vms.in.bk2[.vms.in.bk2$SI_STATE==1,]$FT_REF_SQ)
+       
+         # split .vms and .logbk in three blocks
+           # vms with good match => go to meth3
+           .vms.for.meth3         <- .vms [.vms$FT_REF_SQ_DAY %in%   in.bk.and.feffort.not.at.0, ]
+           # vms with intermediate match => go to meth2
+           .vms.for.meth2         <- .vms [!(.vms$FT_REF_SQ_DAY  %in%   in.bk.and.feffort.not.at.0) &
+                                                     (.vms$FT_REF_SQ    %in%   in.bk2.and.feffort.not.at.0), ]
+           # vms with bad match => go to meth1
+           .vms.for.meth1         <- .vms [!(.vms$FT_REF_SQ_DAY  %in%   in.bk.and.feffort.not.at.0) &
+                                                      !(.vms$FT_REF_SQ  %in%   in.bk2.and.feffort.not.at.0), ]
+           # logbk with good match => go to meth3
+           .logbk.for.meth3       <- .logbk [.logbk$FT_REF_SQ_DAY %in%  in.bk.and.feffort.not.at.0, ]
+           # logbk with intermediate match => go to meth2
+           .logbk.for.meth2       <- .logbk [!(.logbk$FT_REF_SQ_DAY %in%   in.bk.and.feffort.not.at.0) &
+                                                    (.logbk$FT_REF_SQ %in%  in.bk2.and.feffort.not.at.0), ]
+           # logbk with bad match => go to meth1
+           .logbk.for.meth1       <- .logbk [!(.logbk$FT_REF_SQ_DAY %in%   in.bk.and.feffort.not.at.0) &
+                                                       !(.logbk$FT_REF_SQ %in%  in.bk2.and.feffort.not.at.0), ]
+
+           suppressWarnings(rm(merged1, merged2, merged3)) # clear
+             #!! METH1 !!#
+                 if(nrow(.logbk.for.meth1)!=0 && nrow(.vms.for.meth1)!=0 ) {
+                    # remove useless cols and aggregate according to the key 'FT_REF'
+                    .logbk.for.meth1 <- .logbk.for.meth1[, !colnames(.logbk.for.meth1)%in% c("FT_REF_SQ_DAY","FT_REF_SQ")]
+                    nm        <- names(.logbk.for.meth1)
+                    idx.col.w <- grep('KG', nm) # index columns with species weight
+                    idx.col.v <- grep('EURO', nm) # index columns with species value
+                    idx.col <- c(idx.col.w, idx.col.v)
+                    # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF   
+                    DT  <- data.table(.logbk.for.meth1) # library data.table for fast grouping replacing aggregate()
+                    eq1  <- c.listquote(paste("sum(", nm[idx.col],",na.rm=TRUE)",sep=""))
+                    .logbk.for.meth1 <- DT[,eval(eq1),by=list(VE_REF,FT_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)]
+                    .logbk.for.meth1 <- data.frame(.logbk.for.meth1)
+                    colnames(.logbk.for.meth1) <- c("VE_REF","FT_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col])
+                    # do.merging
+                    merged1  <- do.merging(method="FT_REF", .logbk.for.meth1, .vms.for.meth1, general)
+                    # add meth flag
+                     if("flag" %in% names(merged1)  && nrow(merged1[is.na(merged1[,"flag"]),])!=0){
+                        merged1[is.na(merged1[,"flag"]),"flag"] <- 1 # meth 1
+                        } else merged1$flag <- 1
+                    }
+                 #!! METH2 !!#
+                 if(nrow(.logbk.for.meth2)!=0 && nrow(.vms.for.meth2)!=0 ) {
+                    # remove useless cols and aggregate according to the key 'FT_REF_SQ'
+                    .logbk.for.meth2 <- .logbk.for.meth2[, !colnames(.logbk.for.meth2)%in% c("FT_REF_SQ_DAY","FT_REF")]
+                    nm        <- names(.logbk.for.meth2)
+                    idx.col.w <- grep('KG', nm) # index columns with species weight
+                    idx.col.v <- grep('EURO', nm) # index columns with species value
+                    idx.col <- c(idx.col.w, idx.col.v)
+                    # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ
+                    DT  <- data.table( .logbk.for.meth2) # library data.table for fast grouping replacing aggregate()
+                    eq2  <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep=""))
+                    .logbk.for.meth2 <- DT[,eval(eq2),by=list(VE_REF,FT_REF_SQ,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)]
+                    .logbk.for.meth2 <- data.frame(.logbk.for.meth2)
+                    colnames(.logbk.for.meth2) <- c("VE_REF","FT_REF_SQ","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col])
+                    # do.merging
+                    merged2 <- do.merging(method="FT_REF_SQ", .logbk.for.meth2, .vms.for.meth2, general)
+                    # add meth flag
+                    merged2$flag <- 2 # meth 2
+                 }
+                 #!! METH3 !!#
+                 if(nrow(.logbk.for.meth3)!=0 && nrow(.vms.for.meth3)!=0 ) {
+                    # do.merging
+                    merged3 <- do.merging(method="FT_REF_SQ_DAY", .logbk.for.meth3, .vms.for.meth3, general)
+                   # add meth flag
+                    merged3$flag <- 3 # meth 3
+                 }
+
+                 # bind the three blocks
+                 merged <- NULL ; colnm <- NULL
+                 for(i in 1: 3){
+                   a.table <- try(get(paste('merged',i,sep='')), silent=TRUE)
+                   if(class(a.table)!="try-error"){
+                     a.table <- a.table[, !colnames(a.table) %in%
+                                  c("count.fping.trip.sq.day","count.fping.trip.sq","count.fping.trip",
+                                      "tot.fish.effort.trip","tot.fish.effort.trip.sq",
+                                         "count.gr.trip", "count.gr.trip.sq", "count.gr.trip.sq.day",
+                                           "FT_REF_SQ", "FT_REF_SQ_DAY")] # remove tool columns
+                     if(i==1) colnm <-  colnames(a.table) ; if(is.null(colnm)) colnm <-  colnames(a.table)
+                     merged <- rbind.data.frame (merged, a.table[, colnm])
+                     }
+                   }
+
+                 # if still 'not merging' part, retrieve on NA side i.e. occurs when pings in vms but not in bk
+                   merged <- retrieveOnBkSide(merged, type.data=c( "VE_FLT","VE_KW","LE_GEAR", "LE_MET_level6"))  # i.e. when metier=='NA'
+
+
+        # clean up
+        rm(a.table, merged1, merged2, merged3, merged.this.vessel,.vms, .logbk, logbk.this.vessel, vms.this.vessel)
+        gc(reset=TRUE)
+
+        # restore tacsat names               "%e/%m/%Y %H:%M"
+        idx <- merged$SI_DATIM!='NA' # NA is possible when bk not in vms because bk.tripnum vms may belong to another block than block1
+        merged$SI_DATIM <- as.character(merged$SI_DATIM)
+        merged$SI_DATE  <- NA
+        merged[idx,"SI_DATE"] <- paste(substr(merged[idx,]$SI_DATIM ,9,10),"/",
+                                      substr(merged[idx,]$SI_DATIM , 6,7), "/", substr(merged[idx,]$SI_DATIM ,1,4), sep='')
+        merged$SI_TIME  <- NA
+        merged[idx,"SI_TIME"] <- paste(substr(merged[idx,]$SI_DATIM , 12,13),":",
+                                      substr(merged[idx,]$SI_DATIM , 15,16), sep='')
+
+        # last calculation
+        merged$KW_HOURS <- anf(merged$VE_KW) * anf(merged$LE_EFF_VMS) /60
+
+        # order chronologically
+        merged <- orderBy(~SI_DATIM, merged)
+
+        # last clean up
+        merged <- merged[, !colnames(merged) %in% c('idx', 'icessquare', "SI_DATIM", "SI_MIDTIME")]
+
+       # save------------
+       save("merged",   file=file.path(general$output.path,
+             paste("merged_",  a.vesselid,"_",general$a.year,".RData", sep='')))
+       cat(paste("save 'merged'...OK\n\n",sep=""))
+
+
+               }else{  # end 'a.flag'
+     cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n"))
+     # because no vms for this vessel...
+     # TO DO: the logbk way
+     #...
+       }
+     }else{  # end try-error
+     cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n"))
+     # because no vms for this vessel...
+     # TO DO: the logbk way
+     #...
+     }
+
+
+
+
+     } # end a.vesselid
+
+
+
+
+
+
+return()
+}
+
+
+
+
+
+
+  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+  ##!!!!!MAIN!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+  ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+ if(FALSE) {
+
+
+  #\dontrun{
+  data(eflalo)
+  data(tacsat)
+  data(euharbours); euharbours <- harbours
+
+  # format
+  eflalo <- formatEflalo(eflalo)
+  tacsat <- formatTacsat(tacsat)
+
+  # order tacsat chronologically with library(doBy)
+  tacsat <- sortTacsat(tacsat)
+
+  # test each ping if in harbour or not
+  tacsat$SI_HARB <- NA
+  euharbours$Description <- euharbours$harbour
+  tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG),
+                                   lat=anf(tacsat$SI_LATI),
+                                   harbours=euharbours,
+                                   rowSize=30, returnNames=TRUE)
+  inHarb <- tacsat$SI_HARB
+  inHarb <- replace(inHarb, !is.na(inHarb), 1)
+  inHarb <- replace(inHarb, is.na(inHarb), 0)
+  inHarb <- as.numeric(inHarb)
+
+  # assign a trip identifier
+  tacsat$SI_FT <- 1 # init
+  idx <- which(inHarb==0)
+  tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index
+
+  # keep 'out of harbour' points only
+  # (but keep the departure point and the arrival point lying in the harbour)
+  startTrip <- c(diff(tacsat[,"SI_FT"]), 0)
+  endTrip   <- c(0, diff(tacsat[,"SI_FT"]))
+  tacsat[which(startTrip>0),"SI_FT"]  <-  tacsat[which(startTrip>0)+1,"SI_FT"] 
+  tacsat[which(endTrip<0),"SI_FT"]    <-  tacsat[which(endTrip<0)-1,"SI_FT"] 
+  tacsat <- tacsat[which(inHarb==0 |  startTrip>0 |  endTrip<0),]
+
+
+  # assign a state to each ping (here, useless if detectFishing at TRUE)
+  tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming)
+  # fake speed rule for fishing state
+  tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1
+
+
+  # reduce the size of the eflalo data by merging species
+  # (assuming that the other species is coded MZZ), threshold in euros.
+  eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ")
+
+  # debug if eflalo has not been cleaned earlier
+  eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),]
+  
+  # an informed VE_FLT is also required
+  if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1"
+  
+  # possible mis-naming mistakes
+    if(!match('LE_MET_level6',colnames(eflalo))>0){
+      eflalo$LE_MET_level6 <- eflalo$LE_MET
+    }
+
+  # debug
+  eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",]
+
+
+  # TEST FOR A GIVEN SET OF VESSELS
+  # (if detect.fishing is true then do also detection of fishing activity
+  # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states
+  # that will overwrite the existing SI_STATE)
+  mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"),
+                     general=list(output.path=file.path("C:","output"),
+                     visual.check=TRUE, detectFishing=TRUE, speed="segment",
+                     what.speed="calculated"))
+  # ...OR APPLY FOR ALL VESSELS IN eflalo
+  mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat,
+                     general=list(output.path=file.path("C:","output"),
+                     visual.check=TRUE, detectFishing=TRUE, speed="segment",
+                     what.speed="calculated"))
+  gc(reset=TRUE)
+
+  # load the merged output table for one vessel
+  load(file.path("C:","output","merged_804_1800.RData"))
+
+  # check the conservation of landings
+  sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE))
+  sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE)
+
+
+   # ...or bind all vessels (keeping only some given species here)
+  bindAllMergedTables (vessels=c("738", "804"), a.year = "1800",
+                      species.to.keep=c("PLE","COD"),
+                      folder = file.path("C:","output"),
+                      all.in.one.table=TRUE)
+
+    # ...and load the merged output table for all vessels
+  load(file.path("C:","output","all_merged__1800.RData"))
+
+  # map landing of cod from all studied vessels
+  # ( with debugging if tacsat has not been cleaned earlier)
+  graphics.off()
+  df1<- all.merged[, c("SI_LATI","SI_LONG","LE_KG_COD")]
+  df1$SI_LONG <- anf(df1$SI_LONG)
+  df1$SI_LATI <- anf(df1$SI_LATI)
+  df1 <-   df1[ !is.na(df1$SI_LATI),]
+  df1 <-   df1[ !is.na(df1$SI_LONG),]
+  vmsGridCreate(df1,nameLon="SI_LONG", nameLat="SI_LATI",
+                nameVarToSum = "LE_KG_COD", cellsizeX =0.1,
+                cellsizeY =0.05,  legendtitle = "COD landings (kg)")
+
+ # but you need to remove steaming points before gridding!
+  df2<-df1[-which(is.na(df1$LE_KG_COD)),]
+  vmsGridCreate(df2,nameLon="SI_LONG",nameLat="SI_LATI", we = 3, ea = 6, so = 50, no = 54,
+                nameVarToSum = "LE_KG_COD",cellsizeX =0.1,
+                cellsizeY =0.05,  legendtitle = "COD landings (kg)", plotPoints =TRUE, 
+                breaks0=c(1,2,4,8,16,32,64,100000))
+
+
+
+  # CONVERT TO FISHFRAME FORMAT (might take some time running)
+  # (by default, this will keep all the species in the output table)
+  tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF),
+                              species.to.keep=character(),
+                              folder = file.path("C:","output"),
+                              all.in.one.table=FALSE)
+
+  ff  <- pings2Fishframe (general=list(output.path=file.path("C:","output"),
+                          a.year=1800, a.country="NLD", degree=0.05 ) )
+
+
+  # TO DO....
+  # Use the interpolation routine to improve the location of the effort
+  #all.merged$SI_SP <- as.numeric(as.character( all.merged$SI_SP))
+  #all.merged$SI_HE <- as.numeric(as.character( all.merged$SI_HE))
+  #all.merged$SI_LONG <-as.numeric(as.character(all.merged$SI_LONG))
+  #all.merged$SI_LATI <-as.numeric(as.character(all.merged$SI_LATI))
+  #interpolations      <- interpolateTacsat( all.merged [,c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE")]
+  #                            ,interval=120
+  #                            ,margin=12
+  #                            ,res=100
+  #                            ,method="cHs"
+  #                            ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6))
+  #                            ,headingAdjustment=0
+  #                            )
+  #interpolationsED <- equalDistance(interpolations,res=10)
+  # make sure that the 'res' statement in the interpolateTacsat is significantly bigger
+  # than the 'res' statement in the equalDistance function.
+
+  # then map again...
+  #vmsGridCreate(interpolationsED,nameLon="SI_LONG",nameLat="SI_LATI",
+  #          cellsizeX =0.05, cellsizeY =0.05, legendtitle = "landings (kg)")
+
+
+
+  #}
+
+} # end main
diff --git a/vmstools/R/nestedGrid.r b/vmstools/R/nestedGrid.r
index 0c670b0..326c10a 100644
--- a/vmstools/R/nestedGrid.r
+++ b/vmstools/R/nestedGrid.r
@@ -1,85 +1,184 @@
-nestedGrid <- function(tacsat,resx,resy,maxstep = 10, n = 20,control=list(clm=NULL,FUN=NULL)){
-  lon       <- tacsat$SI_LONG; lat <- tacsat$SI_LATI
-  poly_wkt  <- rep(NA,length(lon))
-  idx       <- 1:length(lon)
-  i         <- 1
-  while(i <= maxstep & length(idx) > 0){
-    x       <- 2^floor((i-1)/2)
-    y       <- 2^floor((i)/2)
-    poly    <- polyDef(lon[idx], lat[idx], resx/x, resy/y)
-    count   <- table(poly)
-
-    idxin   <- which(poly  %in% names(count[which(count >= n)]))
-    idxout  <- which(poly  %in% names(count[which(count < n)]))
-    poly_wkt[idx[idxout]] <- poly[idxout]
-
-    #- Final rounds
-    if(i == maxstep){
-      idxout  <- 1:length(poly)
-      poly_wkt[idx[idxout]] <- poly[idxout]
-    }
-
-    idx       <- idx[idxin]
-    i <- i + 1
-  }
-
-  #- Create SpatialPolygons
-  uniquePols  <- poly_wkt[!duplicated(poly_wkt)]
-  Pol         <- lapply(as.list(1:length(uniquePols)),
-                  function(i){eval(parse(text=uniquePols[i]))})
-  Pols        <- lapply(as.list(1:length(uniquePols)),function(x){Polygons(list(Pol[[x]]),ID=x)})
-  SP          <- SpatialPolygons(Pols)
-  SPDF        <- as(SP,"SpatialPolygonsDataFrame")
-
-  #- Call the column to aggregate over
-  if(is.null(control$clm)==TRUE){
-    tacsat$count <- rep(1,nrow(tacsat))
-    clm          <- "count"
-  } else { clm <- control$clm }
-  #- Do the aggregation
-  tacsat$pol      <- poly_wkt
-  if(is.null(control$FUN)==TRUE){
-    agg             <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,sum,na.rm=TRUE)}))
-  } else {
-    agg             <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,control$FUN,na.rm=TRUE)}))
-  }
-  SPDF@data       <- data.frame(agg)
-  colnames(SPDF@data) <- clm
-
-  return(SPDF)
-}
-
-round2 <- function(x, n){
-    posneg = sign(x)
-    z = abs(x)*10^n
-    z = z + 0.5
-    z = trunc(z)
-    z = z/10^n
-    z*posneg
-  }
-
-polyDef <- function(lon, lat, gridx, gridy){
-  # round to the nearest rectangle mid-point
-  lon1 <- round2((lon - gridx/2)/gridx , 0) * gridx + gridx/2
-  lat1 <- round2((lat - gridy/2)/gridy , 0) * gridy + gridy/2
-
-  # create WKT sting
-  out <- paste('Polygon(cbind(c('
-    ,lon1 - gridx/2,','
-    ,lon1 + gridx/2,','
-    ,lon1 + gridx/2,','
-    ,lon1 - gridx/2,','
-    ,lon1 - gridx/2
-    ,'),c('
-    ,lat1 - gridy/2,','
-    ,lat1 - gridy/2,','
-    ,lat1 + gridy/2,','
-    ,lat1 + gridy/2,','
-    ,lat1 - gridy/2
-    ,')))'
-    ,sep='')
-
-  return(out)
-}
-
-
+#' Define a nested grid
+#' 
+#' Assign VMS points to a nested grid so that areas with a high density of
+#' points will have small grid cells and areas with a low density will have
+#' larger cells.
+#' 
+#' The alogrithm works as follows: A coarse starting grid is applied to the
+#' positional data, the number of datapoints in each grid cell is counted and
+#' if this number \code{>= n} then the cell is split in two.  Now the number of
+#' datapoints in the smaller cells are counted again and any cells with
+#' \code{>= n} will be split again, up to \code{maxstep} times.
+#' 
+#' This function allows data-rich areas to be plotted with a high spatial
+#' resolution while allowing a lower spatial resolution for data-poor areas.
+#' The nested grid also tends to reduce the amount of clustering within each
+#' grid cell, which is important for estimating the area impacted by fishing
+#' gear inside each cell. %% ~~ If necessary, more details than the description
+#' above ~~
+#' 
+#' @param tacsat Tacsat dataframe
+#' @param resx gridcell size in degrees in the longitude / x direction
+#' @param resy gridcell size in degrees in the latitude / y direction
+#' @param maxstep The maxiumum number of times the grid cells should be split
+#' @param n If the number of points in a cell \code{>= n} then split the cell
+#' @param control A list determining the output in the \code{data} slot, the
+#' possible components are: \describe{ \item{list("clm")}{ The name of the
+#' tacsat column that \code{FUN} should be applied to.  Also the name of the
+#' only column in the dataframe that makes up the \code{data} slot.  The
+#' default (\code{clm = NULL}) results \code{FUN} being applied a new column
+#' called \code{"count"} which is simply \code{rep(1,nrow(tacsat))}.  }
+#' \item{list("FUN")}{ The function to be applied to \code{tacsat[[clm]]}.  The
+#' default (\code{FUN = NULL}) results in the function \code{sum}, so if
+#' \code{clm = NULL} and \code{FUN = NULL} the result will be a count of the
+#' number of datapoints.  } }
+#' @return A SpatialPolygonsDataFrame, the value of the \code{data} slot will
+#' be determined by the \code{control} settings.
+#' @author Hans D. Gerritsen, Niels T. Hintzen
+#' @seealso \code{\link{polyDef}}
+#' @references Gerritsen, H. D., Minto, C. and Lordan, C. (2013) How much of
+#' the seabed is impacted by mobile fishing gear? Absolute estimates from
+#' Vessel Monitoring System (VMS) point data. ICES Journal of Marine Science
+#' \bold{XX:XX}, doi: 10.1093/icesjms/fst017
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat            <- tacsat[sample(nrow(tacsat),2500),] # to speed it up a bit
+#' tacsat            <- intervalTacsat(tacsat,level="vessel",fill.na=TRUE)
+#' tacsat$INTV       <- ifelse(tacsat$INTV > 240, 240, tacsat$INTV)
+#' tacsat$GEAR_WIDTH <- 0.024
+#' tacsat$SWEPT_AREA <- tacsat$INTV / 60 * tacsat$SI_SP * tacsat$GEAR_WIDTH
+#' SPDF              <- nestedGrid(tacsat, resx=1, resy=0.5, maxstep=10, n=20,
+#'                       control=list(clm="SWEPT_AREA",FUN=sum))
+#' SP                <- as(SPDF,"SpatialPolygons")
+#' SP                <- surface(SP)
+#' tempfun           <- function(x){lapply(x@Polygons,function(y){y@area})}
+#' SPDF@data$surface <- unlist(lapply(SP@polygons,tempfun))
+#' SPDF@data$SAratio <- SPDF@data$SWEPT_AREA / SPDF@data$surface
+#' breaks            <- c(seq(0,quantile(SPDF@data$SAratio,0.95),length.out=9),2,5,10)
+#' i                 <- cut(SPDF@data$SAratio,breaks=breaks)
+#' SPDF@data$col     <- grey(seq(1, 0, length=length(breaks)))[i]
+#' 
+#' plot(NA, xlim = c(1, 5), ylim = c(51.5,55), xlab = 'Longitude',ylab = 'Latitude'
+#'   ,asp=1/lonLatRatio(3,53))
+#' plot(SP,col=SPDF@data$col,add=TRUE,border="lightgrey"); box()
+#' points(tacsat$SI_LONG,tacsat$SI_LAT,cex=0.1,col=4)
+#' 
+#' @export nestedGrid
+nestedGrid <- function(tacsat,resx,resy,maxstep = 10, n = 20,control=list(clm=NULL,FUN=NULL)){
+  lon       <- tacsat$SI_LONG; lat <- tacsat$SI_LATI
+  poly_wkt  <- rep(NA,length(lon))
+  idx       <- 1:length(lon)
+  i         <- 1
+  while(i <= maxstep & length(idx) > 0){
+    x       <- 2^floor((i-1)/2)
+    y       <- 2^floor((i)/2)
+    poly    <- polyDef(lon[idx], lat[idx], resx/x, resy/y)
+    count   <- table(poly)
+
+    idxin   <- which(poly  %in% names(count[which(count >= n)]))
+    idxout  <- which(poly  %in% names(count[which(count < n)]))
+    poly_wkt[idx[idxout]] <- poly[idxout]
+
+    #- Final rounds
+    if(i == maxstep){
+      idxout  <- 1:length(poly)
+      poly_wkt[idx[idxout]] <- poly[idxout]
+    }
+
+    idx       <- idx[idxin]
+    i <- i + 1
+  }
+
+  #- Create SpatialPolygons
+  uniquePols  <- poly_wkt[!duplicated(poly_wkt)]
+  Pol         <- lapply(as.list(1:length(uniquePols)),
+                  function(i){eval(parse(text=uniquePols[i]))})
+  Pols        <- lapply(as.list(1:length(uniquePols)),function(x){Polygons(list(Pol[[x]]),ID=x)})
+  SP          <- SpatialPolygons(Pols)
+  SPDF        <- as(SP,"SpatialPolygonsDataFrame")
+
+  #- Call the column to aggregate over
+  if(is.null(control$clm)==TRUE){
+    tacsat$count <- rep(1,nrow(tacsat))
+    clm          <- "count"
+  } else { clm <- control$clm }
+  #- Do the aggregation
+  tacsat$pol      <- poly_wkt
+  if(is.null(control$FUN)==TRUE){
+    agg             <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,sum,na.rm=TRUE)}))
+  } else {
+    agg             <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,control$FUN,na.rm=TRUE)}))
+  }
+  SPDF@data       <- data.frame(agg)
+  colnames(SPDF@data) <- clm
+
+  return(SPDF)
+}
+
+round2 <- function(x, n){
+    posneg = sign(x)
+    z = abs(x)*10^n
+    z = z + 0.5
+    z = trunc(z)
+    z = z/10^n
+    z*posneg
+  }
+
+
+
+#' Define polygon in text string format
+#' 
+#' Create a text string format polygon which can be called by other functions
+#' 
+#' The function stets up a grid - with the origin at (0,0) - and assigns a grid
+#' cell to each of the points given by \code{lon} and \code{lat}.
+#' 
+#' @param lon Vector with longitude (or x) values
+#' @param lat Vector with latitude (or y) values, should have the same length
+#' as \code{lon}
+#' @param gridx gridcell size in degrees in the longitude / x direction
+#' @param gridy gridcell size in degrees in the latitude / y direction
+#' @return A Well Known Text string for each value of \code{lon} and
+#' \code{lat}.
+#' @note Any points that lie exactly on the border between two grid cells will
+#' be assigned to the grid cell above in the northern hemisphere, below in the
+#' southern hemisphere, to the right in the eastern hemisphere and to the left
+#' in the western hemisphere.
+#' @author Hans D Gerritsen
+#' @seealso \code{\link{nestedGrid}}
+#' @examples
+#' 
+#' lon <- rnorm(25,3)
+#' lat <- rnorm(25,53)
+#' pols <- polyDef(lon, lat, gridx = 0.5, gridy= 0.5)
+#' plot(lon,lat)
+#' tempfun <- function(i){polygon(eval(parse(text=pols[i]))@coords)}
+#' lapply(as.list(1:length(pols)),tempfun)
+#' 
+#' @export polyDef
+polyDef <- function(lon, lat, gridx, gridy){
+  # round to the nearest rectangle mid-point
+  lon1 <- round2((lon - gridx/2)/gridx , 0) * gridx + gridx/2
+  lat1 <- round2((lat - gridy/2)/gridy , 0) * gridy + gridy/2
+
+  # create WKT sting
+  out <- paste('Polygon(cbind(c('
+    ,lon1 - gridx/2,','
+    ,lon1 + gridx/2,','
+    ,lon1 + gridx/2,','
+    ,lon1 - gridx/2,','
+    ,lon1 - gridx/2
+    ,'),c('
+    ,lat1 - gridy/2,','
+    ,lat1 - gridy/2,','
+    ,lat1 + gridy/2,','
+    ,lat1 + gridy/2,','
+    ,lat1 - gridy/2
+    ,')))'
+    ,sep='')
+
+  return(out)
+}
+
+
diff --git a/vmstools/R/outsideMaxRange.R b/vmstools/R/outsideMaxRange.R
index ecd6004..714d44f 100644
--- a/vmstools/R/outsideMaxRange.R
+++ b/vmstools/R/outsideMaxRange.R
@@ -1,26 +1,78 @@
-outsideMaxRange <- function(int
-                               ,tacint
-                               ,params
-                               ,grid){
-    if (!"SI_DATIM" %in% colnames(tacint))
-        tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME,
-            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-                               
-    CI      <- calculateCI(int,
-                               tacint,
-                               params,
-                               grid,
-                               plot=FALSE)
-
-    mxr     <-  maxRangeCI(x=c(int[-1,1][1],rev(int[-1,1])[1]),
-                           y  =c(int[-1,2][1],rev(int[-1,2])[1]),
-                           Time=c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")),
-                           speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 /
-                                      c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2)))
-
-    coords  <- coordinates(CI)
-    propCI  <- point.in.polygon(coords[,1],coords[,2],mxr[[1]][,1],mxr[[1]][,2])
-    insideR <- sum(CI@data$data[which(propCI == 1)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE) #Sum of total CI values inside the maximum range, should ideally be all the grid cells with values
-    outsideR<- sum(CI@data$data[which(propCI == 0)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE)#Sum of total CI values outside the maximum range, should ideally be 0
-    maxR    <- max(CI@data$data[which(propCI == 1)],na.rm=TRUE) #Top of the CI, should ideally equal to 1
-return(list(insideR,outsideR,maxR))}
+#' compute fraction of Confidence Interval that is located outside a maximum
+#' range
+#' 
+#' The calculation of the Confidence Interval surrounding an interpolation
+#' depends on two parameters: sigline & distscale. These use of these
+#' parameters could result in extremely wide or extremely small CI's. To check
+#' which proportion is located inside and outside the maximum range (as defined
+#' by an ellipse), this function calculates this proportion, as well as the
+#' maximum value representing the starting and end point CI values.
+#' 
+#' 
+#' @param int interpolation, as data.frame from 'interpolateTacsat'
+#' @param tacint tacsat records (two rows) corresponding with the interpolation
+#' @param params list of parameters used to perform interpolation
+#' @param grid object of class 'GridTopology' specifying the grid dimensions
+#' @return Returnes a list with three objects: 1) Fraction of CI located
+#' outside maximum range 2) Franction of CI located inside maximum range 3)
+#' Maximum value of CI (top, at VMS points)
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{createGrid}}, \code{\link{calculateCI}},
+#' \code{\link{point.in.polygon}}
+#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using
+#' cubic Hermite spline interpolation of position registration data, EU lot 2
+#' project
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#'   #Sort the Tacsat data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Filter the Tacsat data
+#' tacsat          <- filterTacsat(tacsat,c(2,6),hd=NULL,remDup=TRUE)
+#' 
+#'   #Interpolate the VMS data
+#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,
+#'                     res=100,method="cHs",params=list(fm=0.5,distscale=20,
+#'                     sigline=0.2,st=c(2,6)),headingAdjustment=0)
+#' 
+#'   #Create the final grid where all interpolations should fit on
+#' xrange        <- c(2,3); yrange <- c(51,52)
+#' grid          <- createGrid(xrange,yrange,resx=0.01,resy=0.005)
+#' 
+#' res           <- outsideMaxRange(interpolation[[4]],
+#'                                  tacsat[interpolation[[4]][1,],],
+#'                                  params=list(fm=0.25,distscale=3.1,
+#'                                              sigline=0.4,st=c(2,6)),
+#'                                  grid=grid)
+#' 
+#' 
+#' @export outsideMaxRange
+outsideMaxRange <- function(int
+                               ,tacint
+                               ,params
+                               ,grid){
+    if (!"SI_DATIM" %in% colnames(tacint))
+        tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME,
+            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+                               
+    CI      <- calculateCI(int,
+                               tacint,
+                               params,
+                               grid,
+                               plot=FALSE)
+
+    mxr     <-  maxRangeCI(x=c(int[-1,1][1],rev(int[-1,1])[1]),
+                           y  =c(int[-1,2][1],rev(int[-1,2])[1]),
+                           Time=c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")),
+                           speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 /
+                                      c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2)))
+
+    coords  <- coordinates(CI)
+    propCI  <- point.in.polygon(coords[,1],coords[,2],mxr[[1]][,1],mxr[[1]][,2])
+    insideR <- sum(CI@data$data[which(propCI == 1)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE) #Sum of total CI values inside the maximum range, should ideally be all the grid cells with values
+    outsideR<- sum(CI@data$data[which(propCI == 0)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE)#Sum of total CI values outside the maximum range, should ideally be 0
+    maxR    <- max(CI@data$data[which(propCI == 1)],na.rm=TRUE) #Top of the CI, should ideally equal to 1
+return(list(insideR,outsideR,maxR))}
diff --git a/vmstools/R/overlapPolygons.r b/vmstools/R/overlapPolygons.r
index 3bb5f18..ce6f32f 100644
--- a/vmstools/R/overlapPolygons.r
+++ b/vmstools/R/overlapPolygons.r
@@ -1,71 +1,131 @@
-# overlapPolygons  to calculate the intersection area of 2 or more polygons
-
-overlapPolygons <- function(pol1=NULL,pol2=NULL,projection="LL",zone=NULL){
-
-  #- Class = Polyset
-  if(class(pol1)[1]=="PolySet")
-    pol1 <- as.PolySet(pol1,projection=projection,zone=zone)
-  if(class(pol2)[1]=="PolySet")
-    pol2 <- as.PolySet(pol2,projection=projection,zone=zone)
-
-  #- Class = data.frame
-  if(class(pol1)[1] == "data.frame"){
-    if(nrow(pol1)>ncol(pol1))
-      pol1 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol1),X=pol1[,1],Y=pol1[,2]),projection=projection,zone=zone)
-    if(nrow(pol1)<ncol(pol1))
-      pol1 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol1),X=pol1[1,],Y=pol1[2,]),projection=projection,zone=zone)
-   }
-   if(class(pol2)[1] == "data.frame"){
-    if(nrow(pol2)>ncol(pol2))
-      pol2 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol2),X=pol2[,1],Y=pol2[,2]),projection=projection,zone=zone)
-    if(nrow(pol2)<ncol(pol2))
-      pol2 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol2),X=pol2[1,],Y=pol2[2,]),projection=projection,zone=zone)
-   }
-   
-   #- Class = SpatialPolygons
-   if(class(pol1)[1]=="SpatialPolygons"){
-     pols1 <- numeric()
-     counter       <- 0
-     for(iPol1 in 1:length(pol1@polygons)){
-       for(iPol2 in 1:length(pol1@polygons[[iPol1]]@Polygons)){
-         counter   <- counter + 1
-         sourcePoly <- data.frame(cbind(1,1:nrow(pol1@polygons[[iPol1]]@Polygons[[iPol2]]@coords),
-                                  pol1@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],pol1@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2]))
-         rownames(sourcePoly)<-1:nrow(sourcePoly)
-         colnames(sourcePoly)<-c("PID","POS","X","Y")
-         sourcePoly$PID[]    <-counter
-
-         pols1       <- rbind(pols1,sourcePoly)
-       }
-     }
-
-     pol1   <- as.PolySet(data.frame(pols1),projection=projection,zone=zone)
-   }
-   if(class(pol2)[1]=="SpatialPolygons"){
-     pols2 <- numeric()
-     counter       <- 0
-     for(iPol1 in 1:length(pol2@polygons)){
-       for(iPol2 in 1:length(pol2@polygons[[iPol1]]@Polygons)){
-         counter   <- counter + 1
-         sourcePoly <- data.frame(cbind(1,1:nrow(pol2@polygons[[iPol1]]@Polygons[[iPol2]]@coords),
-                                  pol2@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],pol2@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2]))
-         rownames(sourcePoly)<-1:nrow(sourcePoly)
-         colnames(sourcePoly)<-c("PID","POS","X","Y")
-         sourcePoly$PID[]    <-counter
-
-         pols2       <- rbind(pols2,sourcePoly)
-       }
-     }
-    pol2   <- as.PolySet(data.frame(pols2),projection=projection,zone=zone)
-   }
-
-
-  jpoly  <- try(joinPolys(pol1,pol2))
-  if (!is.null(jpoly) & class(jpoly)!= "try-error"){
-    ar <- calcArea(jpoly)
-    jarea  <- aggregate(ar['area'],by=list(PID=ar$PID),FUN=sum)
-  }else{
-    jarea <- data.frame(PID=unique(pol1$PID),area=0)
-  }
-  return(jarea)
-}
+# overlapPolygons  to calculate the intersection area of 2 or more polygons
+
+
+
+#' Calculate the surface area that polygons have in common
+#' 
+#' Calculate the surface area that 2 or more polygons have in common
+#' 
+#' 
+#' @param pol1 Polygon number 1. Can be of class 'data.frame' with first column
+#' Longitude and second column Latitude. Can be of class 'PolySet' and can be
+#' of class 'SpatialPolygons'.
+#' @param pol2 Polygon number 2. Can be of class 'data.frame' with first column
+#' Longitude and second column Latitude. Can be of class 'PolySet' and can be
+#' of class 'SpatialPolygons'.
+#' @param projection Optional projection attribute to add (often "LL" for
+#' Longitude-Latitude).
+#' @param zone Optional zone attribute to add.
+#' @return Returns a data.frame with overlap in km2 (or non-projected units)
+#' with PID referring to the combination of the two polygon sets.
+#' @author Katell Hamon, Niels Hintzen
+#' @seealso \code{\link{lonLat2SpatialPolygons}}, \code{\link{as.PolySet}},
+#' \code{\link{surface}}
+#' @examples
+#' 
+#' 
+#' 
+#' #- Test with data.frame polygons
+#' pol1 <- data.frame(cbind(c(2,3,3,2),c(54,54,55,55)))
+#' pol2 <- data.frame(cbind(c(2,3,3,2),c(55,55,54.5,54.5)))
+#' 
+#' overlapPolygons(pol1,pol2)
+#' 
+#' #- Test with SpatialPolygons
+#' pol1 <- lonLat2SpatialPolygons(SI_LONG=c(2,3,3,2),SI_LATI=c(54,54,55,55))
+#' pol2 <- lonLat2SpatialPolygons(SI_LONG=c(2,3,3,2),SI_LATI=c(54.5,54.5,55,55))
+#' 
+#' overlapPolygons(pol1,pol2)
+#' surface(pol1)@polygons[[1]]@Polygons[[1]]@area
+#' 
+#' #- Test with PolySet polygons
+#' pol1 <- as.PolySet(data.frame(PID=rep(1,4),POS=1:4,X=c(2,3,3,2),Y=c(54,54,55,55)))
+#' pol2 <- as.PolySet(data.frame(PID=rep(1,4),POS=1:4,X=c(2,3,3,2),Y=c(54.5,54.5,55,55)))
+#' 
+#' overlapPolygons(pol1,pol2)
+#' 
+#' #- Test with multiple polygons
+#' data(tacsat)
+#' pols1 <- cbind(s1=rep(2,length(seq(49,63,2))),s2=c(seq(49,63,2)))
+#' pols2 <- cbind(s1=tacsat$SI_LONG[seq(2,nrow(tacsat),length.out=5)],s2=tacsat$SI_LATI[seq(2,nrow(tacsat),length.out=5)])
+#' resx   <- 1; resy <- 0.5
+#' sPols1                       <- lonLat2SpatialPolygons(lst=lapply(as.list(1:nrow(pols1)),
+#'                                   function(x){data.frame(SI_LONG=c(pols1[x,"s1"]-resx/2,rep(pols1[x,"s1"]+resx/2,2),pols1[x,"s1"]-resx/2),
+#'                                                          SI_LATI=c(rep(pols1[x,"s2"]-resy/2,2),rep(pols1[x,"s2"]+resy/2,2)))}))
+#' sPols2                       <- lonLat2SpatialPolygons(lst=lapply(as.list(1:nrow(pols2)),
+#'                                   function(x){data.frame(SI_LONG=c(pols2[x,"s1"]-resx/2,rep(pols2[x,"s1"]+resx/2,2),pols2[x,"s1"]-resx/2),
+#'                                                          SI_LATI=c(rep(pols2[x,"s2"]-resy/2,2),rep(pols2[x,"s2"]+resy/2,2)))}))
+#' 
+#' overlapPolygons(sPols1,sPols2)
+#' 
+#' 
+#' @export overlapPolygons
+overlapPolygons <- function(pol1=NULL,pol2=NULL,projection="LL",zone=NULL){
+
+  #- Class = Polyset
+  if(class(pol1)[1]=="PolySet")
+    pol1 <- as.PolySet(pol1,projection=projection,zone=zone)
+  if(class(pol2)[1]=="PolySet")
+    pol2 <- as.PolySet(pol2,projection=projection,zone=zone)
+
+  #- Class = data.frame
+  if(class(pol1)[1] == "data.frame"){
+    if(nrow(pol1)>ncol(pol1))
+      pol1 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol1),X=pol1[,1],Y=pol1[,2]),projection=projection,zone=zone)
+    if(nrow(pol1)<ncol(pol1))
+      pol1 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol1),X=pol1[1,],Y=pol1[2,]),projection=projection,zone=zone)
+   }
+   if(class(pol2)[1] == "data.frame"){
+    if(nrow(pol2)>ncol(pol2))
+      pol2 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol2),X=pol2[,1],Y=pol2[,2]),projection=projection,zone=zone)
+    if(nrow(pol2)<ncol(pol2))
+      pol2 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol2),X=pol2[1,],Y=pol2[2,]),projection=projection,zone=zone)
+   }
+   
+   #- Class = SpatialPolygons
+   if(class(pol1)[1]=="SpatialPolygons"){
+     pols1 <- numeric()
+     counter       <- 0
+     for(iPol1 in 1:length(pol1@polygons)){
+       for(iPol2 in 1:length(pol1@polygons[[iPol1]]@Polygons)){
+         counter   <- counter + 1
+         sourcePoly <- data.frame(cbind(1,1:nrow(pol1@polygons[[iPol1]]@Polygons[[iPol2]]@coords),
+                                  pol1@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],pol1@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2]))
+         rownames(sourcePoly)<-1:nrow(sourcePoly)
+         colnames(sourcePoly)<-c("PID","POS","X","Y")
+         sourcePoly$PID[]    <-counter
+
+         pols1       <- rbind(pols1,sourcePoly)
+       }
+     }
+
+     pol1   <- as.PolySet(data.frame(pols1),projection=projection,zone=zone)
+   }
+   if(class(pol2)[1]=="SpatialPolygons"){
+     pols2 <- numeric()
+     counter       <- 0
+     for(iPol1 in 1:length(pol2@polygons)){
+       for(iPol2 in 1:length(pol2@polygons[[iPol1]]@Polygons)){
+         counter   <- counter + 1
+         sourcePoly <- data.frame(cbind(1,1:nrow(pol2@polygons[[iPol1]]@Polygons[[iPol2]]@coords),
+                                  pol2@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],pol2@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2]))
+         rownames(sourcePoly)<-1:nrow(sourcePoly)
+         colnames(sourcePoly)<-c("PID","POS","X","Y")
+         sourcePoly$PID[]    <-counter
+
+         pols2       <- rbind(pols2,sourcePoly)
+       }
+     }
+    pol2   <- as.PolySet(data.frame(pols2),projection=projection,zone=zone)
+   }
+
+
+  jpoly  <- try(joinPolys(pol1,pol2))
+  if (!is.null(jpoly) & class(jpoly)!= "try-error"){
+    ar <- calcArea(jpoly)
+    jarea  <- aggregate(ar['area'],by=list(PID=ar$PID),FUN=sum)
+  }else{
+    jarea <- data.frame(PID=unique(pol1$PID),area=0)
+  }
+  return(jarea)
+}
diff --git a/vmstools/R/pings2EffortMaps.r b/vmstools/R/pings2EffortMaps.r
index 6c01e14..22a023d 100644
--- a/vmstools/R/pings2EffortMaps.r
+++ b/vmstools/R/pings2EffortMaps.r
@@ -1,130 +1,187 @@
-  # Author: F.Bastardie
-  pings2EffortMaps <-
-     function (all.merged,  output= file.path("C:","VMSanalysis", "FemernBelt"),
-        cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0,
-         breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000)){
-
-    # add a quarter
-    cat(paste("add a quarter...can be deadly long!", "\n"))
-    if(!"quarter" %in% colnames(all.merged))
-          all.merged$quarter <- factor(substr(quarters(as.POSIXct(all.merged$SI_DATE)),2,2))
-
-    # create the general folder
-    dir.create(file.path(output), recursive = TRUE )
-    dir.create(file.path(output, "jpegEffort"))
-
-    # detect the year
-    a.year <- format(strptime( paste(all.merged$SI_DATE[1]) , tz='GMT',  "%e/%m/%Y" ), "%Y")
-
-
-    # in overall
-    cat(paste("overall...", "\n"))
-    df1 <- all.merged[all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")]
-    df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
-    df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
-    df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
-    df1 <-   df1[ !is.na(df1$SI_LATI),]
-    df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
-    vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
-          colLand="darkolivegreen4", addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0
-           , legendncol=2)  
-    dir.create(file.path(output, "jpegEffort", "overall"))
-    savePlot(filename = file.path("C:","VMSanalysis", "FemernBelt", "jpegEffort", "overall",
-                            paste("map_effort_hours_merged_vessels_",a.year,".jpeg",sep="")),type ="jpeg")
-    dev.off()
-
-
-      # per quarter
-    cat(paste("per quarter...", "\n"))
-   for (a.quarter in levels(all.merged$quarter) ){
-     df1 <- all.merged[all.merged$quarter==a.quarter  &
-                all.merged$SI_STATE==1,  c("SI_LATI","SI_LONG","LE_EFF_VMS")]
-     df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
-     df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
-     df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
-     df1 <-   df1[ !is.na(df1$SI_LATI),]
-     df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
-      if(nrow(df1)!=0){
-        vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
-          colLand="darkolivegreen4", addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0
-           , legendncol=2)  
-         title(paste(a.year, "-", a.quarter) )
-      # create folder and save
-      dir.create(file.path(output, "jpegEffort", "quarter"))
-      savePlot(filename = file.path(output, "jpegEffort",  "quarter",
-        paste("map_effort_hours_merged_vessels_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
-      dev.off()
-      }
-     }
-
-
-    # per metier
-    cat(paste("per metier...", "\n"))
-    for (met in levels(all.merged$LE_MET_level6) ){
-      df1 <- all.merged[all.merged$LE_MET_level6==met &
-                all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")]
-      df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
-      df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
-      df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
-      df1 <-   df1[ !is.na(df1$SI_LATI),]
-      df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
-     if(nrow(df1)!=0){
-       vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
-          colLand="darkolivegreen4", addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0
-           , legendncol=2)  
-          title(paste(met) )
-      a.met <- gsub(">=", replacement="o",met) # debug
-      a.met <- gsub("<", replacement="",a.met)  # debug
-      a.met <- gsub(">", replacement="o",a.met)  # debug
-      # create folders and save
-      dir.create(file.path(output, "jpegEffort", "overall", a.met))
-      savePlot(filename = file.path(output, "jpegEffort","overall", a.met,
-                            paste("map_effort_hours_merged_vessels_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg")
-      dev.off()
-      }
-   }
-
-   # per metier, quarter
-    cat(paste("per metier, per quarter...", "\n"))
-   for (met in levels(all.merged$LE_MET_level6) ){
-     for (a.quarter in levels(all.merged$quarter) ){
-
-      df1 <- all.merged[all.merged$LE_MET_level6==met &
-                all.merged$quarter==a.quarter  &
-                all.merged$SI_STATE==1,   c("SI_LATI","SI_LONG","LE_EFF_VMS")]
-      df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
-      df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
-      df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
-      df1 <-   df1[ !is.na(df1$SI_LATI),]
-      df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
-      if(nrow(df1)!=0){
-          vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
-          colLand="darkolivegreen4", addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0
-           , legendncol=2)  
-      title(paste(met, "-", a.year, "-", a.quarter) )
-      a.met <- gsub(">=", replacement="o",met) # debug
-      a.met <- gsub("<", replacement="",a.met) # debug
-      a.met <- gsub(">", replacement="o",a.met)# debug
-      dir.create(file.path(output, "jpegEffort", "quarter", a.met))
-      # save
-      savePlot(filename = file.path(output, "jpegEffort",  "quarter",  a.met,
-        paste("map_effort_hours_merged_vessels_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
-      dev.off()
-      }
-     }}
-
- return()
- }
- 
- 
-#  pings2EffortMaps (all.merged=all.merged,  output= file.path("C:","VMSanalysis", "FemernBelt"),
-#        cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0,
-#         breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000))
+  # Author: F.Bastardie
+
+
+#' generate some fishing effort maps from the merged output table
+#' 
+#' generate some effort maps (jpeg files) from the merged table per year, per
+#' metier (level6), per metier-quarter all together
+#' 
+#' using some arguments informing mapGrid(). Performing automatic creation of a
+#' hierarchy of output folders.  see also pings2LandingsMaps().
+#' 
+#' @param all.merged the data.frame from merged VMS with logbooks
+#' @param output a path for the output folder to be created
+#' @param cellsizeX additional arguments for mapGrid
+#' @param cellsizeY additional arguments for mapGrid
+#' @param we additional arguments for mapGrid
+#' @param ea additional arguments for mapGrid
+#' @param no additional arguments for mapGrid
+#' @param so additional arguments for mapGrid
+#' @param breaks0 additional arguments for mapGrid
+#' @author Francois Bastardie
+#' @examples
+#' 
+#'  
+#' \dontrun{
+#'  
+#'  years= c('1800')
+#'   
+#'   for(a.year in years){
+#'   
+#'   graphics.off()
+#'   
+#'   # ...and load the merged output table for all vessels
+#'    load(file.path("C:","output",paste("all_merged__",a.year,".RData",sep='')))
+#' 
+#'   # generate the effort maps (jpeg files) and store in a hierarchy of folders
+#'    pings2EffortMaps (all.merged=all.merged,  output=
+#'     file.path("C:","output"),
+#'     cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54,
+#'      breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000))
+#' 
+#'   # generate the landings maps (jpeg) and store in a hierarchy of folders
+#'    pings2LandingsMaps (all.merged=all.merged, sp="LE_KG_COD",
+#'          output= file.path("C:","output"),
+#'          cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54,
+#'          breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),
+#'          100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000))
+#' 
+#'   }
+#'   
+#'  # create some animations from the sequential jpeg files...
+#'  # (need the 'animation' R package and ImageMagik installed)
+#'  landingsMaps2GIFanim(idir = file.path("C:","output"), spp=c("COD") )
+#'  
+#' }
+#' 
+#' 
+#' @export pings2EffortMaps
+  pings2EffortMaps <-
+     function (all.merged,  output= file.path("C:","VMSanalysis", "FemernBelt"),
+        cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0,
+         breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000)){
+
+    # add a quarter
+    cat(paste("add a quarter...can be deadly long!", "\n"))
+    if(!"quarter" %in% colnames(all.merged))
+          all.merged$quarter <- factor(substr(quarters(as.POSIXct(all.merged$SI_DATE)),2,2))
+
+    # create the general folder
+    dir.create(file.path(output), recursive = TRUE )
+    dir.create(file.path(output, "jpegEffort"))
+
+    # detect the year
+    a.year <- format(strptime( paste(all.merged$SI_DATE[1]) , tz='GMT',  "%e/%m/%Y" ), "%Y")
+
+
+    # in overall
+    cat(paste("overall...", "\n"))
+    df1 <- all.merged[all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")]
+    df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
+    df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
+    df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
+    df1 <-   df1[ !is.na(df1$SI_LATI),]
+    df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
+    vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
+          colLand="darkolivegreen4", addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0
+           , legendncol=2)  
+    dir.create(file.path(output, "jpegEffort", "overall"))
+    savePlot(filename = file.path("C:","VMSanalysis", "FemernBelt", "jpegEffort", "overall",
+                            paste("map_effort_hours_merged_vessels_",a.year,".jpeg",sep="")),type ="jpeg")
+    dev.off()
+
+
+      # per quarter
+    cat(paste("per quarter...", "\n"))
+   for (a.quarter in levels(all.merged$quarter) ){
+     df1 <- all.merged[all.merged$quarter==a.quarter  &
+                all.merged$SI_STATE==1,  c("SI_LATI","SI_LONG","LE_EFF_VMS")]
+     df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
+     df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
+     df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
+     df1 <-   df1[ !is.na(df1$SI_LATI),]
+     df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
+      if(nrow(df1)!=0){
+        vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
+          colLand="darkolivegreen4", addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0
+           , legendncol=2)  
+         title(paste(a.year, "-", a.quarter) )
+      # create folder and save
+      dir.create(file.path(output, "jpegEffort", "quarter"))
+      savePlot(filename = file.path(output, "jpegEffort",  "quarter",
+        paste("map_effort_hours_merged_vessels_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
+      dev.off()
+      }
+     }
+
+
+    # per metier
+    cat(paste("per metier...", "\n"))
+    for (met in levels(all.merged$LE_MET_level6) ){
+      df1 <- all.merged[all.merged$LE_MET_level6==met &
+                all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")]
+      df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
+      df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
+      df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
+      df1 <-   df1[ !is.na(df1$SI_LATI),]
+      df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
+     if(nrow(df1)!=0){
+       vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
+          colLand="darkolivegreen4", addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0
+           , legendncol=2)  
+          title(paste(met) )
+      a.met <- gsub(">=", replacement="o",met) # debug
+      a.met <- gsub("<", replacement="",a.met)  # debug
+      a.met <- gsub(">", replacement="o",a.met)  # debug
+      # create folders and save
+      dir.create(file.path(output, "jpegEffort", "overall", a.met))
+      savePlot(filename = file.path(output, "jpegEffort","overall", a.met,
+                            paste("map_effort_hours_merged_vessels_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg")
+      dev.off()
+      }
+   }
+
+   # per metier, quarter
+    cat(paste("per metier, per quarter...", "\n"))
+   for (met in levels(all.merged$LE_MET_level6) ){
+     for (a.quarter in levels(all.merged$quarter) ){
+
+      df1 <- all.merged[all.merged$LE_MET_level6==met &
+                all.merged$quarter==a.quarter  &
+                all.merged$SI_STATE==1,   c("SI_LATI","SI_LONG","LE_EFF_VMS")]
+      df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
+      df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
+      df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug...
+      df1 <-   df1[ !is.na(df1$SI_LATI),]
+      df1$LE_EFF_VMS <-   anf(df1$LE_EFF_VMS)/ 60 # in hours
+      if(nrow(df1)!=0){
+          vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS",  numCats=10,  plotPoints =FALSE, legendtitle="fishing (hours)",
+          colLand="darkolivegreen4", addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0
+           , legendncol=2)  
+      title(paste(met, "-", a.year, "-", a.quarter) )
+      a.met <- gsub(">=", replacement="o",met) # debug
+      a.met <- gsub("<", replacement="",a.met) # debug
+      a.met <- gsub(">", replacement="o",a.met)# debug
+      dir.create(file.path(output, "jpegEffort", "quarter", a.met))
+      # save
+      savePlot(filename = file.path(output, "jpegEffort",  "quarter",  a.met,
+        paste("map_effort_hours_merged_vessels_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
+      dev.off()
+      }
+     }}
+
+ return()
+ }
+ 
+ 
+#  pings2EffortMaps (all.merged=all.merged,  output= file.path("C:","VMSanalysis", "FemernBelt"),
+#        cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0,
+#         breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000))
diff --git a/vmstools/R/pings2Fishframe.r b/vmstools/R/pings2Fishframe.r
index 697119f..2f5b22c 100644
--- a/vmstools/R/pings2Fishframe.r
+++ b/vmstools/R/pings2Fishframe.r
@@ -1,254 +1,290 @@
-
-
- pings2Fishframe <- function(general=list(output.path=
-             file.path("H:","DIFRES","VMSanalysis","results_merged","DKWaters"),
-                                          a.year=2009, a.country="DNK", degree=0.05)){
-                                          
-  
- # TO FISHFRAME (author: F. Bastardie)
- # create the VE table to upload in fishframe
- # required: the data.table package
- # optional: the  "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library)
- mergedTable2FishframeVE <- function (general=list(output.path=file.path("C:","output"),
-                                          a.year=2009, a.country="DNK", degree=0.05),...){
-    lstargs <- list(...)
-
-    for(what in c('value','weight')){
-       load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep='')))
-       nm <- colnames(all.merged)
-       if (what=='value') idx.col  <- grep('EURO', nm) # index columns with species
-       if (what=='weight') idx.col  <- grep('KG', nm) # index columns with species
-       assign(what, apply(all.merged[,idx.col], 1, sum, na.rm=TRUE))
-       }
-    all.merged$totvalue <- value
-    all.merged$totweight <- weight
-
-    # debug
-    all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA"
-
-    an <- function(x) as.numeric(as.character(x))
-    all.merged$SI_LONG <- an( all.merged$SI_LONG)
-    all.merged$SI_LATI <- an( all.merged$SI_LATI)
-    all.merged <- all.merged[, !colnames(all.merged) %in% c('fuelcons', 'flag')]
-    all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings
-    nm <- colnames(all.merged)
-    if(length(lstargs$spatialPolygons)==0) {
-       data(ICESareas)
-       all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas)
-    }else{
-       all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons)
-    }
-    all.merged$c_square   <- CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree)
-    all.merged$month      <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m"))  # add month
-    all.merged$LE_EFF_VMS <- an(all.merged$LE_EFF_VMS) / 24 # convert in hours
-    all.merged            <- all.merged[,c("LE_EFF_VMS","KW_HOURS","totvalue", "totweight", "LE_MET_level6","ICES_area","c_square","month")]
-    all.merged$c_square   <- factor(all.merged$c_square)
-    all.merged$ICES_area  <- factor(all.merged$ICES_area)
-
- 
-    # base::aggregate() replaced by fast grouping using the data.table library
-    library(data.table)
-    DT     <- data.table(all.merged)
-    qu     <- quote(list(sum(an(LE_EFF_VMS)),sum(an(KW_HOURS)),sum(an(totvalue)),sum(an(totweight))))
-    ff.ve  <- DT[,eval(qu), by=list(c_square,ICES_area, month,LE_MET_level6)]
-    colnames(ff.ve) <- c('c_square','ICES_area', 'month','LE_MET_level6','hours','kw_hours', 'totvalue','totweight')
-
-    # additional
-    ff.ve$year            <- general$a.year
-    ff.ve$country         <- general$a.country
-    ff.ve$nationalFAC     <- " "
-    ff.ve$recordtype      <- "VE"
-    ff.ve$quarter         <- ff.ve$month  # init
-    levels(ff.ve$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4)
-
-
-  
-  #order colums
-  ff.ve <- as.data.frame(ff.ve)[, c('recordtype','country','year','quarter', 'month', 
-               'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6',
-                 'hours','kw_hours','totweight','totvalue')]
-  # save
-  write.table(ff.ve, file=file.path(general$output.path, 
-         paste("ff_ve_", general$a.year, ".txt", sep='')),  dec=".", sep=";", quote=FALSE, row.names=FALSE)
-
-  return(ff.ve)
- }
-
- # TO FISHFRAME (author: F. Bastardie)
- # create the VSL table to upload in fishframe
- # require: the 'data.table' and 'doBy' packages
- # optional: the  "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library)
- mergedTable2FishframeVSL <- function (general=list(output.path=file.path("C:","output"),
-                                          a.year=2009, a.country="DNK", degree=0.05),...){
-      lstargs <- list(...)
-      
-      an <- function (x) as.numeric(as.character(x))
-      
-      # reshape in 'long' format
-         # 1. load
-         what <- "weight"
-         load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep='')))
-         all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA"  # debug
-         nm <- colnames(all.merged)
-         idx.col  <- grep('KG', nm) # index columns with species
-         all.merged$SI_LONG <- an( all.merged$SI_LONG)
-         all.merged$SI_LATI <- an( all.merged$SI_LATI)
-         all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings
-         if(length(lstargs$spatialPolygons)==0) {
-         all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas)
-         }else{
-         data(ICESareas)
-         all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons)
-         }
-         all.merged$c_square  <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree))
-         all.merged$month     <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m"))  # add month
-         nm1                  <- colnames(all.merged)
-         idx.c                <- which (nm1 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month"))
-         xx1                  <-  all.merged [, c(idx.c,idx.col)]
-         colnames(xx1)        <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') )
-         
-         what <- "value"
-         load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep='')))
-         all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA"  # debug
-         nm                 <- colnames(all.merged)
-         idx.col            <- grep('EURO', nm) # index columns with species
-         all.merged$SI_LONG <- an( all.merged$SI_LONG)
-         all.merged$SI_LATI <- an( all.merged$SI_LATI)
-         all.merged         <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings
-         if(length(lstargs$spatialPolygons)==0) {
-             data(ICESareas)
-             all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas)
-         }else{
-             all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons)
-         }
-        all.merged$c_square  <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree))
-        all.merged$month     <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m"))  # add month
-        nm2                  <- colnames(all.merged)
-        idx.c                <- which (nm2 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month"))
-        xx2                  <- all.merged [, c(idx.c,idx.col)]
-        colnames(xx2)        <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') )
-       
-         # 2. order before splitting in sub-blocks because aggregate() afterwards
-         library(doBy)
-         xx1 <- orderBy(~c_square+LE_MET_level6+month, data=xx1)
-         xx2 <- orderBy(~c_square+LE_MET_level6+month, data=xx2)
-         
-         # 3. reshape => 'wide' to 'long' format
-         # (tricky because sub-block by sub-block because of potential 'out of memory')
-         res <- NULL
-         lev <- as.character(levels(xx1$c_square)) # do not split randomly but consistently with levels
-         chunk  <- c( seq(1, length(lev), by=2000), length(lev)) # 2000 by 2000 levels...
-         for(i in 1: (length(chunk)-1)){
-            rm(vsl.ff1,vsl.ff2,vsl.ff) ; gc(reset=TRUE)
-            cat(paste("level c_square",chunk[i],"to",chunk[i+1] ,"\n"))
-            vsl.ff1 <- reshape( xx1[xx1$c_square %in% lev[chunk[i]:chunk[i+1]] , ] , 
-                                   direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format
-            colnames(vsl.ff1) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "weight","id")
-            vsl.ff1$species <- factor (vsl.ff1$species)
-            get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
-            levels(vsl.ff1$species) <- get.sp(nm1[idx.col])
-
-            vsl.ff2 <- reshape( xx2[ xx2$c_square %in% lev[chunk[i]:chunk[i+1]] , ] ,
-                                direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format
-            colnames(vsl.ff2) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "value","id")
-            vsl.ff2$species <- factor (vsl.ff2$species)
-            nm <- colnames(xx2)
-            get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
-            levels(vsl.ff2$species) <- get.sp(nm2[idx.col])
-    
-            # 4. cbind
-            vsl.ff <- cbind.data.frame(vsl.ff1, vsl.ff2$value) 
-         
-            # 5. clean up
-            vsl.ff <- vsl.ff[!is.na(vsl.ff$weight) & vsl.ff$weight!=0,]
-            vsl.ff <- vsl.ff[, !colnames(vsl.ff) %in% c('id')]
-            colnames(vsl.ff) <-  c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area", "c_square","month", "species", "weight", "value")
-    
-           # 6. aggregate with fast grouping (caution: > R.2.11.0) 
-           library(data.table)
-           vsl.ff$ICES_area <- factor(vsl.ff$ICES_area)
-           DT      <- data.table(vsl.ff)
-           qu      <- quote(list(sum(an(weight)),sum(an(value))))
-           vsl.ff  <- DT[,eval(qu), by=list(species,ICES_area,c_square,month,LE_MET_level6)]
-           colnames(vsl.ff ) <- c('species','ICES_area','c_square','month','LE_MET_level6','weight','value')
-
-           # 7. bind all chunks
-           res <- rbind.data.frame(res, vsl.ff)
-           }
- 
-  # 8. additional
-  res$year            <- general$a.year
-  res$country         <- general$a.country
-  res$nationalFAC     <- " "
-  res$recordtype      <- "VSL"
-  res$quarter         <- res$month  # init
-  levels(res$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4)
-
-  # 9. convert species fao code to fishframe latin species names
-  data(speciesLatinNames)
-  res$species <-  speciesLatinNames$ff_species_latin[match(as.character(res$species),
-                               as.character(speciesLatinNames$fao_code))]
-
-   
-  # 10. order colums
-  ff.vsl <- res
-  ff.vsl <- as.data.frame(ff.vsl)[, c('recordtype','country','year','quarter', 'month', 
-               'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6',
-                 'species','weight','value')]
-  # 11. save
-  write.table(ff.vsl, file=file.path(general$output.path, 
-         paste("ff_vsl_", general$a.year, ".txt", sep='')),  dec=".", sep=";", quote=FALSE, row.names=FALSE)
-
- 
- return(ff.vsl)
- }
-         
- 
-  
-  
-  # GENERAL CALLS
-  ve   <- mergedTable2FishframeVE  (general=general)
-  vsl  <- mergedTable2FishframeVSL (general=general)
-
-
-  # add a fake column to get the same ncol()
-  vsl <- cbind(vsl, 0)
-  colnames(ve)  <- paste('col', 1:ncol(ve), sep='')
-  colnames(vsl) <- paste('col', 1:ncol(vsl), sep='')
-
-  # bind and order
-  #(to get a VE line and then VSL lines, VE and then VSL lines, etc.)
-  ff <- rbind(ve,vsl)
-  library(doBy)
-  ff <- orderBy(~col7+col9+col5+col6+col1, data=ff)
-
-  # round the numbers
-  ff[, c(11,12,13)] <- ceiling(ff[, c(11,12,13)])
-
-  # remove record with 0 in value because both
-  # weight and value are mandatory for uploading in fishframe
-  # but this should not have any effect here because 
-  # filling the gap should have been performed before on eflalo...
-  ff <- ff[ff[, c(12)]!=0,]
-
-  # save
-  write.table(ff, file=file.path(general$output.path,
-         paste(general$a.country,"_", general$a.year, "VD.csv", sep='')),  dec=".", sep=";",
-          quote=FALSE, row.names=FALSE, col.names=FALSE)
-
-
-  return(ff)
-  }
-
-
-# example calls
-# vsl <- mergedTable2FishframeVSL (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"),
-#                                          a.year=2009, a.country="DNK", degree=0.05) )
-# ve <- mergedTable2FishframeVE  (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"),
-#                                          a.year=2009, a.country="DNK", degree=0.05) )
-
-# alternatively:
-#for (a_year in as.character(2005:2010)) 
-#  ff <- pings2Fishframe (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"),
-#                                          a.year=a_year, a.country="DNK", degree=0.01) )
-
+#' convert the merged (VMS + logbooks) data.frame to comply with the fishframe
+#' format
+#' 
+#' the merged data.frame coming from mergeEflalo2Pings() can be further
+#' converted to be uploaded in the fishframe data warehouse. This includes the
+#' aggregation of fishing pings (VMS effort and logbook landings) by CSquare
+#' area code and month.
+#' 
+#' The function first starts by searching the input data.frame in the
+#' output.path folder. The result will be saved in the output.path folder as
+#' well. the 'data.table' package is required to perform the aggregation of the
+#' data.frame much faster. An area code is also added (e.g. from the ICESarea()
+#' function). This latter step is costly in terms of computation time due to
+#' the spatial polygon inclusion test performed for a lot of VMS positions
+#' (across a lot of polygons!) and this explains the current poor time
+#' performance of the whole function.
+#' 
+#' @param general a list with general settings, e.g. the output.path
+#' @param a.year a year to be inserted in the csv
+#' @param a.country a country to be inserted in the csv
+#' @param degree XY grid resolution in degree
+#' @author Francois Bastardie
+#' @examples
+#' 
+#' \dontrun{
+#' data(tacsat)
+#' dir.create("C:/output/")
+#' # CONVERT TO FISHFRAME FORMAT (might take some time running)
+#' # (by default, this will keep all the species in the output table)
+#' tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF),
+#'           species.to.keep=character(),a.year="1800",
+#'           folder = file.path("C:","output"), all.in.one.table=FALSE)
+#' 
+#' ff  <- pings2Fishframe (general=list(output.path=file.path("C:","output"),
+#'                         a.year=1800, a.country="Atlantis") )
+#' 
+#' }
+#' @export pings2Fishframe
+ pings2Fishframe <- function(general=list(output.path=
+             file.path("H:","DIFRES","VMSanalysis","results_merged","DKWaters"),
+                                          a.year=2009, a.country="DNK", degree=0.05)){
+                                          
+  
+ # TO FISHFRAME (author: F. Bastardie)
+ # create the VE table to upload in fishframe
+ # required: the data.table package
+ # optional: the  "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library)
+ mergedTable2FishframeVE <- function (general=list(output.path=file.path("C:","output"),
+                                          a.year=2009, a.country="DNK", degree=0.05),...){
+    lstargs <- list(...)
+
+    for(what in c('value','weight')){
+       load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep='')))
+       nm <- colnames(all.merged)
+       if (what=='value') idx.col  <- grep('EURO', nm) # index columns with species
+       if (what=='weight') idx.col  <- grep('KG', nm) # index columns with species
+       assign(what, apply(all.merged[,idx.col], 1, sum, na.rm=TRUE))
+       }
+    all.merged$totvalue <- value
+    all.merged$totweight <- weight
+
+    # debug
+    all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA"
+
+    an <- function(x) as.numeric(as.character(x))
+    all.merged$SI_LONG <- an( all.merged$SI_LONG)
+    all.merged$SI_LATI <- an( all.merged$SI_LATI)
+    all.merged <- all.merged[, !colnames(all.merged) %in% c('fuelcons', 'flag')]
+    all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings
+    nm <- colnames(all.merged)
+    if(length(lstargs$spatialPolygons)==0) {
+       data(ICESareas)
+       all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas)
+    }else{
+       all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons)
+    }
+    all.merged$c_square   <- CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree)
+    all.merged$month      <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m"))  # add month
+    all.merged$LE_EFF_VMS <- an(all.merged$LE_EFF_VMS) / 24 # convert in hours
+    all.merged            <- all.merged[,c("LE_EFF_VMS","KW_HOURS","totvalue", "totweight", "LE_MET_level6","ICES_area","c_square","month")]
+    all.merged$c_square   <- factor(all.merged$c_square)
+    all.merged$ICES_area  <- factor(all.merged$ICES_area)
+
+ 
+    # base::aggregate() replaced by fast grouping using the data.table library
+    library(data.table)
+    DT     <- data.table(all.merged)
+    qu     <- quote(list(sum(an(LE_EFF_VMS)),sum(an(KW_HOURS)),sum(an(totvalue)),sum(an(totweight))))
+    ff.ve  <- DT[,eval(qu), by=list(c_square,ICES_area, month,LE_MET_level6)]
+    colnames(ff.ve) <- c('c_square','ICES_area', 'month','LE_MET_level6','hours','kw_hours', 'totvalue','totweight')
+
+    # additional
+    ff.ve$year            <- general$a.year
+    ff.ve$country         <- general$a.country
+    ff.ve$nationalFAC     <- " "
+    ff.ve$recordtype      <- "VE"
+    ff.ve$quarter         <- ff.ve$month  # init
+    levels(ff.ve$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4)
+
+
+  
+  #order colums
+  ff.ve <- as.data.frame(ff.ve)[, c('recordtype','country','year','quarter', 'month', 
+               'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6',
+                 'hours','kw_hours','totweight','totvalue')]
+  # save
+  write.table(ff.ve, file=file.path(general$output.path, 
+         paste("ff_ve_", general$a.year, ".txt", sep='')),  dec=".", sep=";", quote=FALSE, row.names=FALSE)
+
+  return(ff.ve)
+ }
+
+ # TO FISHFRAME (author: F. Bastardie)
+ # create the VSL table to upload in fishframe
+ # require: the 'data.table' and 'doBy' packages
+ # optional: the  "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library)
+ mergedTable2FishframeVSL <- function (general=list(output.path=file.path("C:","output"),
+                                          a.year=2009, a.country="DNK", degree=0.05),...){
+      lstargs <- list(...)
+      
+      an <- function (x) as.numeric(as.character(x))
+      
+      # reshape in 'long' format
+         # 1. load
+         what <- "weight"
+         load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep='')))
+         all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA"  # debug
+         nm <- colnames(all.merged)
+         idx.col  <- grep('KG', nm) # index columns with species
+         all.merged$SI_LONG <- an( all.merged$SI_LONG)
+         all.merged$SI_LATI <- an( all.merged$SI_LATI)
+         all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings
+         if(length(lstargs$spatialPolygons)==0) {
+         all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas)
+         }else{
+         data(ICESareas)
+         all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons)
+         }
+         all.merged$c_square  <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree))
+         all.merged$month     <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m"))  # add month
+         nm1                  <- colnames(all.merged)
+         idx.c                <- which (nm1 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month"))
+         xx1                  <-  all.merged [, c(idx.c,idx.col)]
+         colnames(xx1)        <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') )
+         
+         what <- "value"
+         load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep='')))
+         all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA"  # debug
+         nm                 <- colnames(all.merged)
+         idx.col            <- grep('EURO', nm) # index columns with species
+         all.merged$SI_LONG <- an( all.merged$SI_LONG)
+         all.merged$SI_LATI <- an( all.merged$SI_LATI)
+         all.merged         <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings
+         if(length(lstargs$spatialPolygons)==0) {
+             data(ICESareas)
+             all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas)
+         }else{
+             all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons)
+         }
+        all.merged$c_square  <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree))
+        all.merged$month     <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m"))  # add month
+        nm2                  <- colnames(all.merged)
+        idx.c                <- which (nm2 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month"))
+        xx2                  <- all.merged [, c(idx.c,idx.col)]
+        colnames(xx2)        <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') )
+       
+         # 2. order before splitting in sub-blocks because aggregate() afterwards
+         library(doBy)
+         xx1 <- orderBy(~c_square+LE_MET_level6+month, data=xx1)
+         xx2 <- orderBy(~c_square+LE_MET_level6+month, data=xx2)
+         
+         # 3. reshape => 'wide' to 'long' format
+         # (tricky because sub-block by sub-block because of potential 'out of memory')
+         res <- NULL
+         lev <- as.character(levels(xx1$c_square)) # do not split randomly but consistently with levels
+         chunk  <- c( seq(1, length(lev), by=2000), length(lev)) # 2000 by 2000 levels...
+         for(i in 1: (length(chunk)-1)){
+            rm(vsl.ff1,vsl.ff2,vsl.ff) ; gc(reset=TRUE)
+            cat(paste("level c_square",chunk[i],"to",chunk[i+1] ,"\n"))
+            vsl.ff1 <- reshape( xx1[xx1$c_square %in% lev[chunk[i]:chunk[i+1]] , ] , 
+                                   direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format
+            colnames(vsl.ff1) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "weight","id")
+            vsl.ff1$species <- factor (vsl.ff1$species)
+            get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
+            levels(vsl.ff1$species) <- get.sp(nm1[idx.col])
+
+            vsl.ff2 <- reshape( xx2[ xx2$c_square %in% lev[chunk[i]:chunk[i+1]] , ] ,
+                                direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format
+            colnames(vsl.ff2) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "value","id")
+            vsl.ff2$species <- factor (vsl.ff2$species)
+            nm <- colnames(xx2)
+            get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
+            levels(vsl.ff2$species) <- get.sp(nm2[idx.col])
+    
+            # 4. cbind
+            vsl.ff <- cbind.data.frame(vsl.ff1, vsl.ff2$value) 
+         
+            # 5. clean up
+            vsl.ff <- vsl.ff[!is.na(vsl.ff$weight) & vsl.ff$weight!=0,]
+            vsl.ff <- vsl.ff[, !colnames(vsl.ff) %in% c('id')]
+            colnames(vsl.ff) <-  c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area", "c_square","month", "species", "weight", "value")
+    
+           # 6. aggregate with fast grouping (caution: > R.2.11.0) 
+           library(data.table)
+           vsl.ff$ICES_area <- factor(vsl.ff$ICES_area)
+           DT      <- data.table(vsl.ff)
+           qu      <- quote(list(sum(an(weight)),sum(an(value))))
+           vsl.ff  <- DT[,eval(qu), by=list(species,ICES_area,c_square,month,LE_MET_level6)]
+           colnames(vsl.ff ) <- c('species','ICES_area','c_square','month','LE_MET_level6','weight','value')
+
+           # 7. bind all chunks
+           res <- rbind.data.frame(res, vsl.ff)
+           }
+ 
+  # 8. additional
+  res$year            <- general$a.year
+  res$country         <- general$a.country
+  res$nationalFAC     <- " "
+  res$recordtype      <- "VSL"
+  res$quarter         <- res$month  # init
+  levels(res$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4)
+
+  # 9. convert species fao code to fishframe latin species names
+  data(speciesLatinNames)
+  res$species <-  speciesLatinNames$ff_species_latin[match(as.character(res$species),
+                               as.character(speciesLatinNames$fao_code))]
+
+   
+  # 10. order colums
+  ff.vsl <- res
+  ff.vsl <- as.data.frame(ff.vsl)[, c('recordtype','country','year','quarter', 'month', 
+               'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6',
+                 'species','weight','value')]
+  # 11. save
+  write.table(ff.vsl, file=file.path(general$output.path, 
+         paste("ff_vsl_", general$a.year, ".txt", sep='')),  dec=".", sep=";", quote=FALSE, row.names=FALSE)
+
+ 
+ return(ff.vsl)
+ }
+         
+ 
+  
+  
+  # GENERAL CALLS
+  ve   <- mergedTable2FishframeVE  (general=general)
+  vsl  <- mergedTable2FishframeVSL (general=general)
+
+
+  # add a fake column to get the same ncol()
+  vsl <- cbind(vsl, 0)
+  colnames(ve)  <- paste('col', 1:ncol(ve), sep='')
+  colnames(vsl) <- paste('col', 1:ncol(vsl), sep='')
+
+  # bind and order
+  #(to get a VE line and then VSL lines, VE and then VSL lines, etc.)
+  ff <- rbind(ve,vsl)
+  library(doBy)
+  ff <- orderBy(~col7+col9+col5+col6+col1, data=ff)
+
+  # round the numbers
+  ff[, c(11,12,13)] <- ceiling(ff[, c(11,12,13)])
+
+  # remove record with 0 in value because both
+  # weight and value are mandatory for uploading in fishframe
+  # but this should not have any effect here because 
+  # filling the gap should have been performed before on eflalo...
+  ff <- ff[ff[, c(12)]!=0,]
+
+  # save
+  write.table(ff, file=file.path(general$output.path,
+         paste(general$a.country,"_", general$a.year, "VD.csv", sep='')),  dec=".", sep=";",
+          quote=FALSE, row.names=FALSE, col.names=FALSE)
+
+
+  return(ff)
+  }
+
+
+# example calls
+# vsl <- mergedTable2FishframeVSL (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"),
+#                                          a.year=2009, a.country="DNK", degree=0.05) )
+# ve <- mergedTable2FishframeVE  (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"),
+#                                          a.year=2009, a.country="DNK", degree=0.05) )
+
+# alternatively:
+#for (a_year in as.character(2005:2010)) 
+#  ff <- pings2Fishframe (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"),
+#                                          a.year=a_year, a.country="DNK", degree=0.01) )
+
diff --git a/vmstools/R/pings2LandingsMaps.r b/vmstools/R/pings2LandingsMaps.r
index 7c12791..6956cd1 100644
--- a/vmstools/R/pings2LandingsMaps.r
+++ b/vmstools/R/pings2LandingsMaps.r
@@ -1,124 +1,182 @@
-  # Author: F.Bastardie
-  pings2LandingsMaps <-
-     function (all.merged, sp="LE_KG_COD", output= file.path("C:","VMSanalysis", "FemernBelt"),
-        cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0,
-          breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)){
-
-    if(!"quarter" %in% colnames(all.merged))
-          all.merged$quarter <- factor(substr(quarters(as.POSIXct(all.merged$SI_DATE)),2,2))
-
-    # create the general folder
-    dir.create(file.path(output), recursive = TRUE )
-    dir.create(file.path(output, "jpegLandings"))
-
-    # detect the year
-    a.year <- format(strptime( paste(all.merged$SI_DATE[1]) , tz='GMT',  "%e/%m/%Y" ), "%Y")
-
-    if (any(grep("EURO", sp)>0)) what <- "value"
-    if (any(grep("KG", sp)>0))  what <- "weight"
-    if(what=="weight") a.unit <- "(KG)"
-    if(what=="value") a.unit <- "(EURO)"
-
-    get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
-    a.sp <- get.sp(sp)
-
-    df1 <- all.merged[all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)]
-    df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
-    df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
-    df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
-    vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what, a.unit,sep=' '),
-          colLand="darkolivegreen4",  addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0, legendncol=2)
-    title(a.sp)
-          # create folders and save
-    dir.create(file.path(output, "jpegLandings", a.sp))
-    dir.create(file.path(output, "jpegLandings", a.sp, "overall"))
-    dir.create(file.path(output, "jpegLandings", a.sp, "overall",what))
-    savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what,
-                            paste("map_landings_",what,"_merged_vessels_",a.sp,"_",a.year,".jpeg",sep="")),type ="jpeg")
-
-    dev.off()
-
-
-      # per quarter
-   for (a.quarter in levels(all.merged$quarter) ){
-     df1 <- all.merged[all.merged$quarter==a.quarter  &
-                all.merged$SI_STATE==1,  c("SI_LATI","SI_LONG",sp)]
-      df1$SI_LATI <- anf(df1$SI_LATI) # debug...
-      df1$SI_LONG <- anf(df1$SI_LONG) # debug...
-      df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
-      if(nrow(df1)!=0){
-      vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '),
-          colLand="darkolivegreen4",  addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0,legendncol=2)
-         title(paste( a.sp, "-", a.year, "-", a.quarter) )
-      # create folder and save
-      dir.create(file.path(output, "jpegLandings", a.sp, "overall", what, "quarter"))
-      savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what, "quarter",
-        paste("map_landings_",what,"_merged_vessels_",a.sp,"_overall_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
-      dev.off()
-      }
-     }
-
-
-    # per metier
-    for (met in levels(all.merged$LE_MET_level6) ){
-      df1 <- all.merged[all.merged$LE_MET_level6==met &
-                all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)]
-      df1$SI_LATI <- anf(df1$SI_LATI) # debug...
-      df1$SI_LONG <- anf(df1$SI_LONG) # debug...
-      df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
-      if(nrow(df1)!=0){
-      vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '),
-          colLand="darkolivegreen4",  addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI",cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0,legendncol=2)
-         title(paste(met, "-", a.sp) )
-      a.met <- gsub(">=", replacement="o",met) # debug
-      a.met <- gsub("<", replacement="",a.met)  # debug
-      a.met <- gsub(">", replacement="o",a.met)  # debug
-      # create folders and save
-      dir.create(file.path(output, "jpegLandings", a.sp, a.met))
-      dir.create(file.path(output, "jpegLandings", a.sp, a.met, what))
-      savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what,
-                            paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg")
-      dev.off()
-      }
-   }
-
-   # per metier, quarter
-   for (met in levels(all.merged$LE_MET_level6) ){
-     for (a.quarter in levels(all.merged$quarter) ){
-
-      df1 <- all.merged[all.merged$LE_MET_level6==met &
-                all.merged$quarter==a.quarter  &
-                all.merged$SI_STATE==1,  c("SI_LATI","SI_LONG",sp)]
-      df1$SI_LATI <- anf(df1$SI_LATI) # debug...
-      df1$SI_LONG <- anf(df1$SI_LONG) # debug...
-      df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
-      if(nrow(df1)!=0){
-      vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '),
-          colLand="darkolivegreen4",  addICESgrid=TRUE,
-            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
-             breaks0=breaks0,legendncol=2)
-         title(paste(met, "-", a.sp, "-", a.year, "-", a.quarter) )
-      a.met <- gsub(">=", replacement="o",met) # debug
-      a.met <- gsub("<", replacement="",a.met) # debug
-      a.met <- gsub(">", replacement="o",a.met)# debug
-      dir.create(file.path(output, "jpegLandings", a.sp, a.met, what, "quarter"))
-      # save
-      savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what, "quarter",
-        paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
-      dev.off()
-      }
-     }}
-
- return()
- }
-  #  in value
- # pings2LandingsMaps (all.merged=all.merged, sp="LE_EURO_COD", output= file.path("C:","VMSanalysis", "FemernBelt"),
- #                   cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area
- #                       breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)
- #                       )
+  # Author: F.Bastardie
+
+
+#' generate some landings maps from the merged output table
+#' 
+#' generate landings maps from the merged table per year, per metier (DCF
+#' Level6), per metier-quarter all together
+#' 
+#' using some arguments informing mapGrid(). Performing automatic creation of a
+#' hierarchy of output folders.
+#' 
+#' @param all.merged the data.frame from merged VMS to logbooks
+#' @param sp sp name (FAO code)
+#' @param output a path for the output to be located
+#' @param cellsizeX additional arguments for mapGrid
+#' @param cellsizeY additional arguments for mapGrid
+#' @param we additional arguments for mapGrid
+#' @param ea additional arguments for mapGrid
+#' @param no additional arguments for mapGrid
+#' @param so additional arguments for mapGrid
+#' @param breaks0 additional arguments for mapGrid
+#' @author Francois Bastardie
+#' @examples
+#' 
+#'  
+#' \dontrun{
+#'  
+#'   years= c('1800')
+#'   
+#'   for(a.year in years){
+#'   
+#'   graphics.off()
+#'   
+#'   # ...and load the merged output table for all vessels
+#'    load(file.path("C:","output",paste("all_merged__",a.year,".RData",sep='')))
+#' 
+#'   # generate the effort maps (jpeg files) and store in a hierarchy of folders
+#'    pings2EffortMaps (all.merged=all.merged,  output=
+#'     file.path("C:","output"),
+#'     cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54,
+#'      breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000))
+#' 
+#'   # generate the landings maps (jpeg) and store in a hierarchy of folders
+#'    pings2LandingsMaps (all.merged=all.merged, sp="LE_KG_COD",
+#'          output= file.path("C:","output"),
+#'          cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54,
+#'          breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),
+#'          100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000))
+#' 
+#'   }
+#'   
+#'  # create some animations from the sequential jpeg files...
+#'  # (need the 'animation' R package and ImageMagik installed)
+#'  landingsMaps2GIFanim(idir = file.path("C:","output"), spp=c("COD") )
+#'     
+#'     
+#' }
+#' 
+#' @export pings2LandingsMaps
+  pings2LandingsMaps <-
+     function (all.merged, sp="LE_KG_COD", output= file.path("C:","VMSanalysis", "FemernBelt"),
+        cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0,
+          breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)){
+
+    if(!"quarter" %in% colnames(all.merged))
+          all.merged$quarter <- factor(substr(quarters(as.POSIXct(all.merged$SI_DATE)),2,2))
+
+    # create the general folder
+    dir.create(file.path(output), recursive = TRUE )
+    dir.create(file.path(output, "jpegLandings"))
+
+    # detect the year
+    a.year <- format(strptime( paste(all.merged$SI_DATE[1]) , tz='GMT',  "%e/%m/%Y" ), "%Y")
+
+    if (any(grep("EURO", sp)>0)) what <- "value"
+    if (any(grep("KG", sp)>0))  what <- "weight"
+    if(what=="weight") a.unit <- "(KG)"
+    if(what=="value") a.unit <- "(EURO)"
+
+    get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
+    a.sp <- get.sp(sp)
+
+    df1 <- all.merged[all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)]
+    df1$SI_LATI <- anf(df1$SI_LATI ) # debug...
+    df1$SI_LONG <- anf(df1$SI_LONG ) # debug...
+    df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
+    vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what, a.unit,sep=' '),
+          colLand="darkolivegreen4",  addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0, legendncol=2)
+    title(a.sp)
+          # create folders and save
+    dir.create(file.path(output, "jpegLandings", a.sp))
+    dir.create(file.path(output, "jpegLandings", a.sp, "overall"))
+    dir.create(file.path(output, "jpegLandings", a.sp, "overall",what))
+    savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what,
+                            paste("map_landings_",what,"_merged_vessels_",a.sp,"_",a.year,".jpeg",sep="")),type ="jpeg")
+
+    dev.off()
+
+
+      # per quarter
+   for (a.quarter in levels(all.merged$quarter) ){
+     df1 <- all.merged[all.merged$quarter==a.quarter  &
+                all.merged$SI_STATE==1,  c("SI_LATI","SI_LONG",sp)]
+      df1$SI_LATI <- anf(df1$SI_LATI) # debug...
+      df1$SI_LONG <- anf(df1$SI_LONG) # debug...
+      df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
+      if(nrow(df1)!=0){
+      vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '),
+          colLand="darkolivegreen4",  addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0,legendncol=2)
+         title(paste( a.sp, "-", a.year, "-", a.quarter) )
+      # create folder and save
+      dir.create(file.path(output, "jpegLandings", a.sp, "overall", what, "quarter"))
+      savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what, "quarter",
+        paste("map_landings_",what,"_merged_vessels_",a.sp,"_overall_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
+      dev.off()
+      }
+     }
+
+
+    # per metier
+    for (met in levels(all.merged$LE_MET_level6) ){
+      df1 <- all.merged[all.merged$LE_MET_level6==met &
+                all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)]
+      df1$SI_LATI <- anf(df1$SI_LATI) # debug...
+      df1$SI_LONG <- anf(df1$SI_LONG) # debug...
+      df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
+      if(nrow(df1)!=0){
+      vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '),
+          colLand="darkolivegreen4",  addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI",cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0,legendncol=2)
+         title(paste(met, "-", a.sp) )
+      a.met <- gsub(">=", replacement="o",met) # debug
+      a.met <- gsub("<", replacement="",a.met)  # debug
+      a.met <- gsub(">", replacement="o",a.met)  # debug
+      # create folders and save
+      dir.create(file.path(output, "jpegLandings", a.sp, a.met))
+      dir.create(file.path(output, "jpegLandings", a.sp, a.met, what))
+      savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what,
+                            paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg")
+      dev.off()
+      }
+   }
+
+   # per metier, quarter
+   for (met in levels(all.merged$LE_MET_level6) ){
+     for (a.quarter in levels(all.merged$quarter) ){
+
+      df1 <- all.merged[all.merged$LE_MET_level6==met &
+                all.merged$quarter==a.quarter  &
+                all.merged$SI_STATE==1,  c("SI_LATI","SI_LONG",sp)]
+      df1$SI_LATI <- anf(df1$SI_LATI) # debug...
+      df1$SI_LONG <- anf(df1$SI_LONG) # debug...
+      df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0)
+      if(nrow(df1)!=0){
+      vmsGridCreate(df1, nameVarToSum=sp,  numCats=10,  plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '),
+          colLand="darkolivegreen4",  addICESgrid=TRUE,
+            nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so,
+             breaks0=breaks0,legendncol=2)
+         title(paste(met, "-", a.sp, "-", a.year, "-", a.quarter) )
+      a.met <- gsub(">=", replacement="o",met) # debug
+      a.met <- gsub("<", replacement="",a.met) # debug
+      a.met <- gsub(">", replacement="o",a.met)# debug
+      dir.create(file.path(output, "jpegLandings", a.sp, a.met, what, "quarter"))
+      # save
+      savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what, "quarter",
+        paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg")
+      dev.off()
+      }
+     }}
+
+ return()
+ }
+  #  in value
+ # pings2LandingsMaps (all.merged=all.merged, sp="LE_EURO_COD", output= file.path("C:","VMSanalysis", "FemernBelt"),
+ #                   cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area
+ #                       breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)
+ #                       )
diff --git a/vmstools/R/plotTools.r b/vmstools/R/plotTools.r
index c0ea991..9995912 100644
--- a/vmstools/R/plotTools.r
+++ b/vmstools/R/plotTools.r
@@ -1,116 +1,179 @@
-plotTools <- function(x,level="ICESrectangle",xlim,ylim,zlim=NULL,log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL),control.eflalo=list(clm=NULL),returnRange=FALSE,las=1){
-  require(maps)
-  require(mapdata)
-  if(is.null(color)==TRUE) color <- rev(heat.colors(9))
-  
-    #TACSAT
-  if(all(c("SI_LATI","SI_LONG") %in% colnames(x))){
-      #- Get left lower point from ICES rectangle in gps format
-    x$LE_RECT             <- ICESrectangle(x)
-    
-      #- Limit the spatial boundaries of tacsat
-    idxx                  <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2])
-    idxy                  <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2])
-    x                     <- x[idxx[which(idxx %in% idxy)],]
-    x$SI_LONG             <- af(ac(x$SI_LONG))
-    x$SI_LATI             <- af(ac(x$SI_LATI))
-    if(is.null(control.tacsat$clm)==TRUE){
-        control.tacsat$clm  <- "idx"
-        x$idx               <- 1:nrow(x)
-    }
-      
-    #---------------------------------------------------------------------------
-    #- Sum by rectangle
-    #---------------------------------------------------------------------------
-    if(level == "ICESrectangle"){
-      x$SI_LONG             <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,2])
-      x$SI_LATI             <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,1])
-
-      DT                    <- data.table(x)
-      eq1                   <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep=""))
-      eq2                   <- c.listquote(c("SI_LONG","SI_LATI"))
-      
-      byRect                <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm)
-      byRect                <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),]
-      if(length(control.tacsat$clm)>1)  rangeRect             <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE))
-      if(length(control.tacsat$clm)==1) rangeRect             <- range(byRect[,control.tacsat$clm],na.rm=TRUE)
-      rangeRect             <- c(0,rangeRect[2])
-    }
-    #---------------------------------------------------------------------------
-    #- Sum by grid cell
-    #---------------------------------------------------------------------------
-    if(level == "gridcell"){
-      grids                 <- createGrid(xlim,ylim,gridcell[1],gridcell[2],type="SpatialPixelsDataFrame")
-      coords                <- SpatialPointsDataFrame(cbind(x=an(ac(x$SI_LONG)),y=an(ac(x$SI_LATI))),data=x)
-      coords@data$dens      <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels"))
-      
-        #- Sum by gridcell
-      DT                    <- data.table(data.frame(coords))
-      DT$x                  <- af(ac(grids@coords[DT$dens,1]))
-      DT$y                  <- af(ac(grids@coords[DT$dens,2]))
-      
-      eq1                   <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep=""))
-      eq2                   <- c.listquote(c("x","y"))
-      
-      byRect                <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm)
-      byRect$SI_LONG        <- signif(anf(byRect$SI_LONG))
-      byRect$SI_LATI        <- signif(anf(byRect$SI_LATI))
-      if(length(control.tacsat$clm)>1)  rangeRect             <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE))
-      if(length(control.tacsat$clm)==1) rangeRect             <- range(byRect[,control.tacsat$clm],na.rm=TRUE)
-      rangeRect             <- c(0,rangeRect[2])
-    }
-    ctrl                    <- control.tacsat
-  }
-
-
-    #EFLALO
-  if(all(c("VE_FLT","VE_KW") %in% colnames(x))){
-    if(level != "ICESrectangle") stop("Data supplied is 'eflalo' which only has ICESrectangle")
-    x$SI_LONG             <- ICESrectangle2LonLat(ac(x$LE_RECT))[,2]
-    x$SI_LATI             <- ICESrectangle2LonLat(ac(x$LE_RECT))[,1]
-    
-      #- Limit the spatial boundaries of eflalo
-    idxx                  <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2])
-    idxy                  <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2])
-    x                     <- x[idxx[which(idxx %in% idxy)],]
-    
-      #- Sum by rectangle
-    if(is.null(control.eflalo$clm)==TRUE) control.eflalo$clm <- colnames(x[,kgeur(colnames(x))])
-    DT                    <- data.table(x)
-    eq1                   <- c.listquote(paste("sum(",control.eflalo$clm,",na.rm=TRUE)",sep=""))
-    eq2                   <- c.listquote(c("SI_LONG","SI_LATI"))
-    DT$SI_LONG            <- af(ac(DT$SI_LONG)); DT$SI_LATI <- af(ac(DT$SI_LATI))
-    
-    byRect                <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.eflalo$clm)
-    byRect                <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),]
-    if(length(control.eflalo$clm)>1)  rangeRect             <- range(apply(byRect[,control.eflalo$clm],1,sum,na.rm=TRUE))
-    if(length(control.eflalo$clm)==1) rangeRect             <- range(byRect[,control.eflalo$clm],na.rm=TRUE)
-    rangeRect             <- c(0,rangeRect[2])
-    ctrl                  <- control.eflalo
-  }
-
-  map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen");axis(1,las=las);axis(2,las=las);box()
-  for(iRect in 1:nrow(byRect)){
-    if(log){
-      if(is.null(zlim)==TRUE){ i         <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(rangeRect[1]==0,0,log(rangeRect[1])))
-                                               /(log(rangeRect[2]) - ifelse(rangeRect[1]==0,0,log(rangeRect[1])))*(length(color)-1)) +1
-      } else {
-          i                           <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(zlim[1]==0,0,log(zlim[1])))
-                                               /(log(zlim[2]) -      ifelse(zlim[1]==0,0,log(zlim[1])))          *(length(color)-1)) +1
-        }
-    } else {
-        if(is.null(zlim)==TRUE){ i       <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(rangeRect[1]==0,0,rangeRect[1]))
-                                               /(rangeRect[2] -      ifelse(rangeRect[1]==0,0,rangeRect[1]))     *(length(color)-1)) +1
-        } else {
-            i                         <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(zlim[1]==0,0,zlim[1]))
-                                               /(zlim[2] -           ifelse(zlim[1]==0,0,zlim[1]))               *(length(color)-1)) +1
-          }
-      }
-    if(level == "ICESrectangle") polygon(x=c(an(ac(byRect[iRect,"SI_LONG"])),an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))),
-                                         y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2),rep(an(ac(byRect[iRect,"SI_LATI"]))+0.5,2)),col=color[i],lwd=1,border=NA)
-    if(level == "gridcell")      polygon(x=c(an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2),
-                                         y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2)-gridcell[2]/2,rep(an(ac(byRect[iRect,"SI_LATI"]))+gridcell[2]/2,2)),col=color[i],lwd=1,border=NA)
-  }
-  map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen",plt=FALSE,add=TRUE);box()
-  if(returnRange) return(rangeRect)
-}
\ No newline at end of file
+#' Plot eflalo or tacsat files
+#' 
+#' Plot eflalo or tacsat given a grid and column name as a map of intensity
+#' 
+#' 
+#' @param x Eflalo or tacsat dataframe
+#' @param level Aggregating level: "ICESrectangle" or specified "gridcell"
+#' between xlim and ylim in steps gricell for tacsat or "ICESrectangle" only
+#' for eflalo
+#' @param xlim two element numeric vector giving a range of longitudes,
+#' expressed in degrees, to which drawing should be restricted. Longitude is
+#' measured in degrees east of Greenwich, so that, in particular, locations in
+#' the USA have negative longitude. If fill = TRUE, polygons selected by region
+#' must be entirely inside the xlim range. The default value of this argument
+#' spans the entire longitude range of the database.
+#' @param ylim two element numeric vector giving a range of latitudes,
+#' expressed in degrees, to which drawing should be restricted. Latitude is
+#' measured in degrees north of the equator, so that, in particular, locations
+#' in the USA have positive latitude. If fill = TRUE, polygons selected by
+#' region must be entirely inside the ylim range. The default value of this
+#' argument spans the entire latitude range of the database.
+#' @param zlim the minimum and maximum z values for which colors should be
+#' plotted, defaulting to the range of the finite values of z. Each of the
+#' given colors will be used to color an equispaced interval of this range. The
+#' midpoints of the intervals cover the range, so that values just outside the
+#' range will be plotted.
+#' @param log whether values to plot need to be logged, TRUE or FALSE
+#' @param gridcell two element numeric vector giving the steps in longitudinal
+#' degrees and steps in latitudinal degrees for the grid to plot on. Only
+#' needed when level = "gridcell"
+#' @param color colors range. default = brewer.pal(9,"YlOrRd")
+#' @param control.tacsat list with attribute "clm": column names to display
+#' aggregated results over
+#' @param control.eflalo list with attribute "clm": column names to display
+#' aggregated results over
+#' @param returnRange Logical: return range of plotted values (which can be
+#' used to define own legend
+#' @param las Direction of axis legends. See ?par for more info
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{plotTreeMap}}, \code{\link{Grid2KLM}},
+#' \code{\link{landingsMap2GIFanim}}, \code{\link{pings2EffortMaps}},
+#' \code{\link{pings2LandingsMaps}}
+#' @examples
+#' 
+#' data(tacsat)
+#' data(eflalo)
+#' 
+#' plotTools(tacsat,level="ICESrectangle",xlim=c(-5,10),ylim=c(48,62),zlim=NULL,
+#'           log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL))
+#' x11()
+#' plotTools(eflalo,level="ICESrectangle",xlim=c(-5,10),ylim=c(48,62),zlim=NULL,
+#'           log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL))
+#' x11()
+#' plotTools(tacsat,level="gridcell",xlim=c(-5,10),ylim=c(48,62),zlim=NULL,
+#'           log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL))
+#' 
+#' x11()
+#' plotTools(eflalo,level="ICESrectangle",xlim=c(-5,10),ylim=c(48,62),zlim=NULL,
+#'           log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL),
+#'           control.eflalo=list(clm=c("LE_KG_COD","LE_KG_PLE")))
+#' 
+#' 
+#' @export plotTools
+plotTools <- function(x,level="ICESrectangle",xlim,ylim,zlim=NULL,log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL),control.eflalo=list(clm=NULL),returnRange=FALSE,las=1){
+  require(maps)
+  require(mapdata)
+  if(is.null(color)==TRUE) color <- rev(heat.colors(9))
+  
+    #TACSAT
+  if(all(c("SI_LATI","SI_LONG") %in% colnames(x))){
+      #- Get left lower point from ICES rectangle in gps format
+    x$LE_RECT             <- ICESrectangle(x)
+    
+      #- Limit the spatial boundaries of tacsat
+    idxx                  <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2])
+    idxy                  <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2])
+    x                     <- x[idxx[which(idxx %in% idxy)],]
+    x$SI_LONG             <- af(ac(x$SI_LONG))
+    x$SI_LATI             <- af(ac(x$SI_LATI))
+    if(is.null(control.tacsat$clm)==TRUE){
+        control.tacsat$clm  <- "idx"
+        x$idx               <- 1:nrow(x)
+    }
+      
+    #---------------------------------------------------------------------------
+    #- Sum by rectangle
+    #---------------------------------------------------------------------------
+    if(level == "ICESrectangle"){
+      x$SI_LONG             <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,2])
+      x$SI_LATI             <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,1])
+
+      DT                    <- data.table(x)
+      eq1                   <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep=""))
+      eq2                   <- c.listquote(c("SI_LONG","SI_LATI"))
+      
+      byRect                <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm)
+      byRect                <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),]
+      if(length(control.tacsat$clm)>1)  rangeRect             <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE))
+      if(length(control.tacsat$clm)==1) rangeRect             <- range(byRect[,control.tacsat$clm],na.rm=TRUE)
+      rangeRect             <- c(0,rangeRect[2])
+    }
+    #---------------------------------------------------------------------------
+    #- Sum by grid cell
+    #---------------------------------------------------------------------------
+    if(level == "gridcell"){
+      grids                 <- createGrid(xlim,ylim,gridcell[1],gridcell[2],type="SpatialPixelsDataFrame")
+      coords                <- SpatialPointsDataFrame(cbind(x=an(ac(x$SI_LONG)),y=an(ac(x$SI_LATI))),data=x)
+      coords@data$dens      <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels"))
+      
+        #- Sum by gridcell
+      DT                    <- data.table(data.frame(coords))
+      DT$x                  <- af(ac(grids@coords[DT$dens,1]))
+      DT$y                  <- af(ac(grids@coords[DT$dens,2]))
+      
+      eq1                   <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep=""))
+      eq2                   <- c.listquote(c("x","y"))
+      
+      byRect                <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm)
+      byRect$SI_LONG        <- signif(anf(byRect$SI_LONG))
+      byRect$SI_LATI        <- signif(anf(byRect$SI_LATI))
+      if(length(control.tacsat$clm)>1)  rangeRect             <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE))
+      if(length(control.tacsat$clm)==1) rangeRect             <- range(byRect[,control.tacsat$clm],na.rm=TRUE)
+      rangeRect             <- c(0,rangeRect[2])
+    }
+    ctrl                    <- control.tacsat
+  }
+
+
+    #EFLALO
+  if(all(c("VE_FLT","VE_KW") %in% colnames(x))){
+    if(level != "ICESrectangle") stop("Data supplied is 'eflalo' which only has ICESrectangle")
+    x$SI_LONG             <- ICESrectangle2LonLat(ac(x$LE_RECT))[,2]
+    x$SI_LATI             <- ICESrectangle2LonLat(ac(x$LE_RECT))[,1]
+    
+      #- Limit the spatial boundaries of eflalo
+    idxx                  <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2])
+    idxy                  <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2])
+    x                     <- x[idxx[which(idxx %in% idxy)],]
+    
+      #- Sum by rectangle
+    if(is.null(control.eflalo$clm)==TRUE) control.eflalo$clm <- colnames(x[,kgeur(colnames(x))])
+    DT                    <- data.table(x)
+    eq1                   <- c.listquote(paste("sum(",control.eflalo$clm,",na.rm=TRUE)",sep=""))
+    eq2                   <- c.listquote(c("SI_LONG","SI_LATI"))
+    DT$SI_LONG            <- af(ac(DT$SI_LONG)); DT$SI_LATI <- af(ac(DT$SI_LATI))
+    
+    byRect                <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.eflalo$clm)
+    byRect                <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),]
+    if(length(control.eflalo$clm)>1)  rangeRect             <- range(apply(byRect[,control.eflalo$clm],1,sum,na.rm=TRUE))
+    if(length(control.eflalo$clm)==1) rangeRect             <- range(byRect[,control.eflalo$clm],na.rm=TRUE)
+    rangeRect             <- c(0,rangeRect[2])
+    ctrl                  <- control.eflalo
+  }
+
+  map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen");axis(1,las=las);axis(2,las=las);box()
+  for(iRect in 1:nrow(byRect)){
+    if(log){
+      if(is.null(zlim)==TRUE){ i         <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(rangeRect[1]==0,0,log(rangeRect[1])))
+                                               /(log(rangeRect[2]) - ifelse(rangeRect[1]==0,0,log(rangeRect[1])))*(length(color)-1)) +1
+      } else {
+          i                           <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(zlim[1]==0,0,log(zlim[1])))
+                                               /(log(zlim[2]) -      ifelse(zlim[1]==0,0,log(zlim[1])))          *(length(color)-1)) +1
+        }
+    } else {
+        if(is.null(zlim)==TRUE){ i       <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(rangeRect[1]==0,0,rangeRect[1]))
+                                               /(rangeRect[2] -      ifelse(rangeRect[1]==0,0,rangeRect[1]))     *(length(color)-1)) +1
+        } else {
+            i                         <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(zlim[1]==0,0,zlim[1]))
+                                               /(zlim[2] -           ifelse(zlim[1]==0,0,zlim[1]))               *(length(color)-1)) +1
+          }
+      }
+    if(level == "ICESrectangle") polygon(x=c(an(ac(byRect[iRect,"SI_LONG"])),an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))),
+                                         y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2),rep(an(ac(byRect[iRect,"SI_LATI"]))+0.5,2)),col=color[i],lwd=1,border=NA)
+    if(level == "gridcell")      polygon(x=c(an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2),
+                                         y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2)-gridcell[2]/2,rep(an(ac(byRect[iRect,"SI_LATI"]))+gridcell[2]/2,2)),col=color[i],lwd=1,border=NA)
+  }
+  map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen",plt=FALSE,add=TRUE);box()
+  if(returnRange) return(rangeRect)
+}
diff --git a/vmstools/R/plotTreeMap.r b/vmstools/R/plotTreeMap.r
index cb27d31..192b7c9 100644
--- a/vmstools/R/plotTreeMap.r
+++ b/vmstools/R/plotTreeMap.r
@@ -1,141 +1,193 @@
-
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-# F.Bastardie
-# using stolen code on the net
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-
-plotTreeMap <-
-function (x, gridcell = c(0.1, 0.1), gear = "OTB", xlim = c(-1,
-    17), ylim = c(52, 62), acolors = rainbow(7), species.to.keep = c("LE_KG_COD",
-    "LE_KG_NEP", "LE_KG_PLE", "LE_KG_SOL"))
-{
-    chop <- function(x) rev(rev(x)[-1])
-    simple.hook <- function(z, xl, yl, xu, yu) {
-        rect(xl, yl, xu, yu, col = as.character(z$one), border = NA)
-    }
-    squarified.treemap <- function(z, x = 0, y = 0, w = 1, h = 1,
-        hook) {
-        cz <- cumsum(z$size)/sum(z$size)
-        n <- which.min(abs(log(max(w/h, h/w) * sum(z$size) *
-            cz^2/z$size)))
-        more <- n < length(z$size)
-        a <- c(0, cz[1:n])/cz[n]
-        if (h > w) {
-            hook(z[1:n, ], x + w * chop(a), rep(y, n), x + w *
-                a[-1], rep(y + h * cz[n], n))
-            if (more)
-                Recall(z[-(1:n), ], x, y + h * cz[n], w, h *
-                  (1 - cz[n]), hook)
-        }
-        else {
-            hook(z[1:n, ], rep(x, n), y + h * chop(a), rep(x +
-                w * cz[n], n), y + h * a[-1])
-            if (more)
-                Recall(z[-(1:n), ], x + w * cz[n], y, w * (1 -
-                  cz[n]), h, hook)
-        }
-    }
-    x <- x[x$SI_STATE == 1, ]
-    x$SI_LONG <- anf(x$SI_LONG)
-    x$SI_LATI <- anf(x$SI_LATI)
-    x <- x[!is.na(x$SI_LATI), ]
-    idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2])
-    idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2])
-    x <- x[idxx[which(idxx %in% idxy)], ]
-    xx <- x[x$LE_GEAR %in% gear, ]
-    grids <- createGrid(xrange = xlim, yrange = ylim,
-        gridcell[1], gridcell[2], type = "SpatialPixelsDataFrame")
-    coords <- SpatialPointsDataFrame(cbind(x = an(ac(xx$SI_LONG)),
-        y = an(ac(xx$SI_LATI))), data = xx)
-    coords@data$dens      <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels"))
-    DT <- data.table(data.frame(coords))
-    DT$x <- af(ac(grids@coords[DT$dens, 1]))
-    DT$y <- af(ac(grids@coords[DT$dens, 2]))
-    idx.col <- grep("KG", names(coords))
-    eq1 <- c.listquote(paste("sum(", names(coords[, idx.col]),
-        ",na.rm=TRUE)", sep = ""))
-    eq2 <- c.listquote(c("x", "y"))
-    byRect <- data.frame(DT[, eval(eq1), by = eval(eq2)])
-    colnames(byRect) <- c("SI_LONG", "SI_LATI", names(coords)[idx.col])
-    byRect$SI_LONG <- signif(anf(byRect$SI_LONG))
-    byRect$SI_LATI <- signif(anf(byRect$SI_LATI))
-    idx.col <- grep("KG", colnames(byRect))
-    rangeRect <- range(apply(byRect[idx.col], 1, sum, na.rm = TRUE))
-    rangeRect <- c(0, rangeRect[2])
-    A.sum <- apply(byRect[, idx.col], 1, sum, na.rm = TRUE)
-    A.sum2 <- apply(byRect[, idx.col], 2, sum, na.rm = TRUE)
-    species.to.merge <- names(A.sum2)[!names(A.sum2) %in% species.to.keep]
-    byRect$LE_KG_OTH <- apply(byRect[, species.to.merge], 1,
-        sum, na.rm = TRUE)
-    byRect <- byRect[, !names(byRect) %in% species.to.merge]
-    idx.col <- grep("KG", names(byRect))
-    byRect[, idx.col] <- sweep(byRect[, idx.col], 1, A.sum, FUN = "/")
-    X11(7, 7)
-    require(mapdata)
-    map("worldHires", resolution = 1, xlim = xlim, ylim = ylim,
-        fill = TRUE, col = "darkgreen")
-    map.axes()
-    box()
-    for (iRect in 1:nrow(byRect)) {
-        x1 <- an(ac(byRect[iRect, "SI_LONG"]))
-        y1 <- an(ac(byRect[iRect, "SI_LATI"]))
-        size <- an(ac(byRect[iRect, idx.col]))
-        size <- replace(size, is.na(size) | size <= 0, 1e-04) ## DEBUG: NO NEGATIVE size ALLOWED
-        z <- data.frame(size = size, one = acolors[1:((1 + length(idx.col)) -
-            1)])
-        z <- z[order(-z$size), ]
-        print(z)
-        squarified.treemap(z, x = x1, y = y1, w = gridcell[1],
-            h = gridcell[2], hook = simple.hook)
-    }
-    for (i in seq(xlim[1], xlim[2], by = gridcell[1])) abline(v = i,
-        col = grey(0.9))
-    for (i in seq(ylim[1], ylim[2], by = gridcell[2])) abline(h = i,
-        col = grey(0.9))
-    map("worldHires", add = TRUE, resolution = 1, xlim = xlim,
-        ylim = ylim, fill = TRUE, col = "darkgreen")
-    map.axes()
-    box()
-    legend("topright", legend = gsub('LE_KG_','', names(byRect[, idx.col])), fill = acolors[1:((1 +
-        length(idx.col)) - 1)], bg = "white")
-
-   
-  
-    return()
-}
-
-
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
-
-
-  ## do mergeEflalo2Pings()....
-  # ...and load the merged output table for all vessels
-  #load(file.path("C:","output","all_merged__1800.RData"))
-
-
-  #graphics.off()
-  #df1 <- all.merged[, c("LE_MET_level6", "LE_GEAR","SI_STATE", "SI_LATI","SI_LONG","LE_KG_COD","LE_KG_PLE")]
-  #df1$SI_LONG <- anf(df1$SI_LONG)
-  #df1$SI_LATI <- anf(df1$SI_LATI)
-  #df1 <-   df1[ !is.na(df1$SI_LATI),]
-  #df1 <-   df1[ !is.na(df1$SI_LONG),]
-  #df1$LE_MET  <- df1$LE_MET_level6
-
-
-  ## call to plotTreeMap()
-  #a.metier <- "TBB_DEF_70-99_0_0"
-  #a.gear   <-"TBB"
-  #plotTreeMap (df1[df1$LE_MET_level6 %in% a.metier & df1$SI_STATE==1,],  gridcell=c(0.1,0.05), gear=a.gear,
-  #              xlim= c(3,6), ylim= c(50,54), acolors=rainbow(7),
-  #                species.to.keep= c("LE_KG_COD","LE_KG_PLE") )
-  #mtext("Latitude", 2, 3) ;  mtext("Longitude",1, 2)
-
-
-
- 
\ No newline at end of file
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+# F.Bastardie
+# using stolen code on the net
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+
+
+
+#' Plot a squarified treemap of landing proportion per cell.
+#' 
+#' Plot a squarified treemap of landing proportion per cell from the merged
+#' VMS/logbooks tables.
+#' 
+#' to be further custumized...
+#' 
+#' @param x a merged data.frame such as the one produced by mergeEflalo2Pings()
+#' @param gridcell grid cell resolution in degree
+#' @param gear subset the data according to a(set of) gear(s)
+#' @param xlim longitude range
+#' @param ylim latitude range
+#' @param acolors should be at least same of length of the species.to.keep
+#' @param species.to.keep set of eflalo names for species (the same for
+#' plotting cash value not implemented yet.)
+#' @return plot a graph
+#' @author Francois Bastardie
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' 
+#'  \dontrun{
+#' 
+#'   
+#'   # do mergeEflalo2Pings()....
+#'   # ...and load the merged output table for all vessels
+#'   load(file.path("C:","output","all_merged__1800.RData"))
+#' 
+#' 
+#'   graphics.off()
+#'   df1 <- all.merged[, c("LE_MET_level6", "LE_GEAR","SI_STATE", "SI_LATI",
+#'                         "SI_LONG","LE_KG_COD","LE_KG_PLE")]
+#'   df1$SI_LONG <- anf(df1$SI_LONG)
+#'   df1$SI_LATI <- anf(df1$SI_LATI)
+#'   df1 <-   df1[ !is.na(df1$SI_LATI),]
+#'   df1 <-   df1[ !is.na(df1$SI_LONG),]
+#'   df1$LE_MET  <- df1$LE_MET_level6
+#' 
+#' 
+#'   # call to plotTreeMap()
+#'   a.metier <- "TBB_DEF_70-99_0_0"
+#'   a.gear   <-"TBB"
+#'   plotTreeMap (df1[df1$LE_MET_level6 %in% a.metier & df1$SI_STATE==1,],  gridcell=c(0.1,0.05), gear=a.gear,
+#'                 xlim= c(3,6), ylim= c(50,54), acolors=rainbow(7),
+#'                   species.to.keep= c("LE_KG_COD","LE_KG_PLE") )
+#'   mtext("Latitude", 2, 3) ;  mtext("Longitude",1, 2)
+#' 
+#' }
+#'                                  
+#' 
+#' @export plotTreeMap
+plotTreeMap <-
+function (x, gridcell = c(0.1, 0.1), gear = "OTB", xlim = c(-1,
+    17), ylim = c(52, 62), acolors = rainbow(7), species.to.keep = c("LE_KG_COD",
+    "LE_KG_NEP", "LE_KG_PLE", "LE_KG_SOL"))
+{
+    chop <- function(x) rev(rev(x)[-1])
+    simple.hook <- function(z, xl, yl, xu, yu) {
+        rect(xl, yl, xu, yu, col = as.character(z$one), border = NA)
+    }
+    squarified.treemap <- function(z, x = 0, y = 0, w = 1, h = 1,
+        hook) {
+        cz <- cumsum(z$size)/sum(z$size)
+        n <- which.min(abs(log(max(w/h, h/w) * sum(z$size) *
+            cz^2/z$size)))
+        more <- n < length(z$size)
+        a <- c(0, cz[1:n])/cz[n]
+        if (h > w) {
+            hook(z[1:n, ], x + w * chop(a), rep(y, n), x + w *
+                a[-1], rep(y + h * cz[n], n))
+            if (more)
+                Recall(z[-(1:n), ], x, y + h * cz[n], w, h *
+                  (1 - cz[n]), hook)
+        }
+        else {
+            hook(z[1:n, ], rep(x, n), y + h * chop(a), rep(x +
+                w * cz[n], n), y + h * a[-1])
+            if (more)
+                Recall(z[-(1:n), ], x + w * cz[n], y, w * (1 -
+                  cz[n]), h, hook)
+        }
+    }
+    x <- x[x$SI_STATE == 1, ]
+    x$SI_LONG <- anf(x$SI_LONG)
+    x$SI_LATI <- anf(x$SI_LATI)
+    x <- x[!is.na(x$SI_LATI), ]
+    idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2])
+    idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2])
+    x <- x[idxx[which(idxx %in% idxy)], ]
+    xx <- x[x$LE_GEAR %in% gear, ]
+    grids <- createGrid(xrange = xlim, yrange = ylim,
+        gridcell[1], gridcell[2], type = "SpatialPixelsDataFrame")
+    coords <- SpatialPointsDataFrame(cbind(x = an(ac(xx$SI_LONG)),
+        y = an(ac(xx$SI_LATI))), data = xx)
+    coords@data$dens      <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels"))
+    DT <- data.table(data.frame(coords))
+    DT$x <- af(ac(grids@coords[DT$dens, 1]))
+    DT$y <- af(ac(grids@coords[DT$dens, 2]))
+    idx.col <- grep("KG", names(coords))
+    eq1 <- c.listquote(paste("sum(", names(coords[, idx.col]),
+        ",na.rm=TRUE)", sep = ""))
+    eq2 <- c.listquote(c("x", "y"))
+    byRect <- data.frame(DT[, eval(eq1), by = eval(eq2)])
+    colnames(byRect) <- c("SI_LONG", "SI_LATI", names(coords)[idx.col])
+    byRect$SI_LONG <- signif(anf(byRect$SI_LONG))
+    byRect$SI_LATI <- signif(anf(byRect$SI_LATI))
+    idx.col <- grep("KG", colnames(byRect))
+    rangeRect <- range(apply(byRect[idx.col], 1, sum, na.rm = TRUE))
+    rangeRect <- c(0, rangeRect[2])
+    A.sum <- apply(byRect[, idx.col], 1, sum, na.rm = TRUE)
+    A.sum2 <- apply(byRect[, idx.col], 2, sum, na.rm = TRUE)
+    species.to.merge <- names(A.sum2)[!names(A.sum2) %in% species.to.keep]
+    byRect$LE_KG_OTH <- apply(byRect[, species.to.merge], 1,
+        sum, na.rm = TRUE)
+    byRect <- byRect[, !names(byRect) %in% species.to.merge]
+    idx.col <- grep("KG", names(byRect))
+    byRect[, idx.col] <- sweep(byRect[, idx.col], 1, A.sum, FUN = "/")
+    X11(7, 7)
+    require(mapdata)
+    map("worldHires", resolution = 1, xlim = xlim, ylim = ylim,
+        fill = TRUE, col = "darkgreen")
+    map.axes()
+    box()
+    for (iRect in 1:nrow(byRect)) {
+        x1 <- an(ac(byRect[iRect, "SI_LONG"]))
+        y1 <- an(ac(byRect[iRect, "SI_LATI"]))
+        size <- an(ac(byRect[iRect, idx.col]))
+        size <- replace(size, is.na(size) | size <= 0, 1e-04) ## DEBUG: NO NEGATIVE size ALLOWED
+        z <- data.frame(size = size, one = acolors[1:((1 + length(idx.col)) -
+            1)])
+        z <- z[order(-z$size), ]
+        print(z)
+        squarified.treemap(z, x = x1, y = y1, w = gridcell[1],
+            h = gridcell[2], hook = simple.hook)
+    }
+    for (i in seq(xlim[1], xlim[2], by = gridcell[1])) abline(v = i,
+        col = grey(0.9))
+    for (i in seq(ylim[1], ylim[2], by = gridcell[2])) abline(h = i,
+        col = grey(0.9))
+    map("worldHires", add = TRUE, resolution = 1, xlim = xlim,
+        ylim = ylim, fill = TRUE, col = "darkgreen")
+    map.axes()
+    box()
+    legend("topright", legend = gsub('LE_KG_','', names(byRect[, idx.col])), fill = acolors[1:((1 +
+        length(idx.col)) - 1)], bg = "white")
+
+   
+  
+    return()
+}
+
+
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!##
+
+
+  ## do mergeEflalo2Pings()....
+  # ...and load the merged output table for all vessels
+  #load(file.path("C:","output","all_merged__1800.RData"))
+
+
+  #graphics.off()
+  #df1 <- all.merged[, c("LE_MET_level6", "LE_GEAR","SI_STATE", "SI_LATI","SI_LONG","LE_KG_COD","LE_KG_PLE")]
+  #df1$SI_LONG <- anf(df1$SI_LONG)
+  #df1$SI_LATI <- anf(df1$SI_LATI)
+  #df1 <-   df1[ !is.na(df1$SI_LATI),]
+  #df1 <-   df1[ !is.na(df1$SI_LONG),]
+  #df1$LE_MET  <- df1$LE_MET_level6
+
+
+  ## call to plotTreeMap()
+  #a.metier <- "TBB_DEF_70-99_0_0"
+  #a.gear   <-"TBB"
+  #plotTreeMap (df1[df1$LE_MET_level6 %in% a.metier & df1$SI_STATE==1,],  gridcell=c(0.1,0.05), gear=a.gear,
+  #              xlim= c(3,6), ylim= c(50,54), acolors=rainbow(7),
+  #                species.to.keep= c("LE_KG_COD","LE_KG_PLE") )
+  #mtext("Latitude", 2, 3) ;  mtext("Longitude",1, 2)
+
+
+
+ 
diff --git a/vmstools/R/plot_mcp.r b/vmstools/R/plot_mcp.r
index 76f9faf..98b072d 100644
--- a/vmstools/R/plot_mcp.r
+++ b/vmstools/R/plot_mcp.r
@@ -1,32 +1,65 @@
-plot_mcp <- function (plotnew = TRUE, plotpoints = TRUE, points.col = "black",
-    points.pch = 1, titletxt = "Title", xaxis = "Easting (m)",
-    yaxis = "Northing (m)", mcp.col = "black", mcp.lwd = 2, fill.col = NA,
-    jpeg = FALSE, ...)
-{
-    par(...)
-    if (jpeg) {
-        jpeg(filename = paste("MCP", r.MCP$id, ".jpg", sep = ""),
-            width = 600, height = 600, pointsize = 12, quality = 90,
-            bg = "white", res = NA)
-    }
-    if (plotnew) {
-        plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col,
-            col = mcp.col, lwd = mcp.lwd)
-        title(paste(titletxt, sep = ""))
-    }
-    else {
-        default.parameters <- par(no.readonly = TRUE)
-        xlim.max <- c(default.parameters$usr[1], default.parameters$usr[2])
-        ylim.max <- c(default.parameters$usr[3], default.parameters$usr[4])
-        plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col,
-            col = mcp.col, lwd = mcp.lwd, xlim = xlim.max, ylim = ylim.max,
-            add = TRUE)
-        title(paste(titletxt, sep = ""))
-    }
-    if (plotpoints) {
-        points(r.MCP$points, col = points.col, pch = points.pch)
-    }
-    if (jpeg) {
-        dev.off()
-    }
-}
+#' Plot the Minimum Convex Polygon
+#' 
+#' This function plots the MCP as a polygon, which covers the geographical
+#' extent of a set of points on a Cartesian plane.
+#' 
+#' The r.MCP object (generated in calc_mcp function) is required to plot the
+#' MCP.
+#' 
+#' @param plotnew Boolean: Set to TRUE to create a new plot. Set to FALSE to
+#' overlay current plot.
+#' @param plotpoints Boolean: Set to TRUE if the point observations are to be
+#' plotted
+#' @param points.col Specify a colour for the point observations
+#' @param points.pch Specify a plotting symbol for the point observations
+#' @param titletxt A string to use as the title on the plot
+#' @param xaxis A string to label the x-axis of the plot
+#' @param yaxis A string to label the y-axis of the plot
+#' @param mcp.col Specify the line colour for the MCP
+#' @param mcp.lwd Specify the line width for the MCP
+#' @param fill.col Specify a fill colour for the MCP
+#' @param jpeg Boolean: Set to TRUE if the plot should be saved in JPEG format
+#' @param \dots Arguments to be passed to graphical parameters
+#' @author Randy Bui, Ron N. Buliung, Tarmo K. Remmel
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#' calc_mcp(id=1, points = tacsat[1:10,c("SI_LONG","SI_LATI")], filename="MCP_Output.txt",
+#' verbose = FALSE, pct = 100)
+#' plot_mcp(plotnew=TRUE, plotpoints=TRUE, titletxt="Title",
+#' xaxis= "Easting (m)", yaxis="Northing (m)")
+#' 
+#' @export plot_mcp
+plot_mcp <- function (plotnew = TRUE, plotpoints = TRUE, points.col = "black",
+    points.pch = 1, titletxt = "Title", xaxis = "Easting (m)",
+    yaxis = "Northing (m)", mcp.col = "black", mcp.lwd = 2, fill.col = NA,
+    jpeg = FALSE, ...)
+{
+    par(...)
+    if (jpeg) {
+        jpeg(filename = paste("MCP", r.MCP$id, ".jpg", sep = ""),
+            width = 600, height = 600, pointsize = 12, quality = 90,
+            bg = "white", res = NA)
+    }
+    if (plotnew) {
+        plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col,
+            col = mcp.col, lwd = mcp.lwd)
+        title(paste(titletxt, sep = ""))
+    }
+    else {
+        default.parameters <- par(no.readonly = TRUE)
+        xlim.max <- c(default.parameters$usr[1], default.parameters$usr[2])
+        ylim.max <- c(default.parameters$usr[3], default.parameters$usr[4])
+        plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col,
+            col = mcp.col, lwd = mcp.lwd, xlim = xlim.max, ylim = ylim.max,
+            add = TRUE)
+        title(paste(titletxt, sep = ""))
+    }
+    if (plotpoints) {
+        points(r.MCP$points, col = points.col, pch = points.pch)
+    }
+    if (jpeg) {
+        dev.off()
+    }
+}
diff --git a/vmstools/R/pointInHarbour.r b/vmstools/R/pointInHarbour.r
index 7a43b22..47dacdd 100644
--- a/vmstools/R/pointInHarbour.r
+++ b/vmstools/R/pointInHarbour.r
@@ -1,83 +1,144 @@
-pointInHarbour <- function(lon,lat,harbours,rowSize=30, returnNames=FALSE,saveHarbourList=TRUE){
-
-    xharb     <- harbours$lon
-    yharb     <- harbours$lat
-    rharb     <- harbours$range
-    harb      <- cbind(xharb,yharb,rharb)
-    if("Description" %in% colnames(harbours)) rownames(harb) <- harbours$Description
-    if("harbour" %in% colnames(harbours))     rownames(harb) <- harbours$harbour
-    harb      <- orderBy(~xharb+yharb,data=harb)
-
-    xys       <- data.frame(lon,lat)
-    ordxys    <- order(xys$lon,xys$lat)
-    lon       <- lon[ordxys]
-    lat       <- lat[ordxys]
-
-    nChunks   <- ceiling(length(lon)/rowSize)
-     store     <- rep(0, length(lon))
-    for(chunks in 1:nChunks){
-      if(chunks == nChunks){
-        x1    <- lon[(chunks*rowSize-rowSize+1):length(lon)]
-        y1    <- lat[(chunks*rowSize-rowSize+1):length(lon)]
-      } else {
-          x1    <- lon[(chunks*rowSize-rowSize+1):(chunks*rowSize)]
-          y1    <- lat[(chunks*rowSize-rowSize+1):(chunks*rowSize)]
-        }
-
-      xr        <- range(x1,na.rm=TRUE); xr <- c(xr[1]-0.05,xr[2]+0.05)
-      yr        <- range(y1,na.rm=TRUE); yr <- c(yr[1]-0.05,yr[2]+0.05)
-      res1      <- which(harb[,"xharb"] >= xr[1] & harb[,"xharb"] <= xr[2])
-      res2      <- which(harb[,"yharb"] >= yr[1] & harb[,"yharb"] <= yr[2])
-      res3      <- res1[which(is.na(pmatch(res1,res2))==FALSE)]
-
-      if(length(res3)>0){
-        for(hars in res3){
-          #print(hars)
-          x2  <- harb[hars,"xharb"]
-          y2  <- harb[hars,"yharb"]
-
-          pd  <- pi/180
-
-          a1  <- sin(((y2-y1)*pd)/2)
-          a2  <- cos(y1*pd)
-          a3  <- cos(y2*pd)
-          a4  <- sin(((x2-x1)*pd)/2)
-          a   <- a1*a1+a2*a3*a4*a4
-
-          c   <- 2*atan2(sqrt(a),sqrt(1-a));
-          R   <- 6371;
-          dx1 <- R*c
-
-          res <- numeric(length(x1))
-          idx <- which(dx1<=harb[hars,"rharb"])
-          res[idx] <- 1
-          if(returnNames){
-            res[idx]  <- rownames(harb)[hars]  # overwrite '1' with the port names
-            if(chunks==nChunks){
-              idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):length(lon)][idx] == "0")]
-              store[(chunks*rowSize-rowSize+1):length(lon)][idx2] <- res[idx2]
-            } else {
-                idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx] == "0")]
-                store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx2] <- res[idx2]
-              }
-          } else {
-              if(chunks==nChunks){
-                store[(chunks*rowSize-rowSize+1):length(lon)]   <- store[(chunks*rowSize-rowSize+1):length(lon)]+res
-              } else {
-                  store[(chunks*rowSize-rowSize+1):(chunks*rowSize)] <- store[(chunks*rowSize-rowSize+1):(chunks*rowSize)]+res
-                }
-            }
-        }
-      }
-    }
-    if(returnNames == FALSE) store[which(store>0)] <- 1
-    #Get order in tacsat back
-    store[ordxys] <- store
-
-    if(returnNames) store <- replace(store, store=="0", NA)
-    if(saveHarbourList) write.table(harbours,file="harbourList_pointInHarbour.txt",append=FALSE,sep="\t")
-
-return(store)}
-    
-
-
+#' Find points in harbour within specified range
+#' 
+#' Method to find the gps positions given with tacsat data that are situated
+#' within a range of a port.
+#' 
+#' The method returns the index of points that are within a harbour area, given
+#' the midpoints of the harbours and a range (in km) from these midpoints.
+#' 
+#' @param lon Longitudinal positions of the TACSAT formatted data
+#' @param lat Latitudinal positions of teh TACSAT formatted data
+#' @param harbour Latitudinal and Longitudinal position of the harbour and
+#' outer range from midpoint of harbour
+#' @param returnNames Logical: return the name of the harbour instead of 1 / 0
+#' indicating if it is inside or outside the harbour. Default to FALSE
+#' @param saveHarbourList Logical: writing harbour list used in function to
+#' file. Default to TRUE
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{distance}}, \code{\link{lonLatRatio}},
+#' \code{\link{sortTacsat}}, \code{\link{filterTacsat}},
+#' \code{\link{mergeEflalo2Tacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(eflalo)
+#' data(tacsat)
+#' data(euharbours); euharbours <- harbours
+#' 
+#' #-Remove duplicated records from tacsat
+#' myf       <- paste(tacsat$VE_REF,tacsat$SI_LATI,tacsat$SI_LONG,
+#'                    tacsat$SI_DATE,tacsat$SI_TIME);
+#' tacsat    <- tacsat[!duplicated(myf),];
+#' 
+#' 
+#' #-Find all the gps locations that are located within the port area
+#' idx <- pointInHarbour(lon=tacsat$SI_LONG,lat=tacsat$SI_LATI,
+#'                       harbours=harbours,returnNames=TRUE)
+#' print(head(idx))
+#' getwd() #in this directory, the harbour list will be written to disk
+#' idx <- pointInHarbour(lon=tacsat$SI_LONG,lat=tacsat$SI_LATI,
+#'                       harbours=harbours,saveHarbourList=TRUE)
+#' 
+#' idx <- pointInHarbour(lon=tacsat$SI_LONG,lat=tacsat$SI_LATI,
+#'                       harbours=harbours)
+#' idx <- which(idx==1)
+#' 
+#' #-Plot these port locations on a map
+#' library(maps); library(mapdata)
+#' #map the world, but plot only the northsea by lon and lat limits,
+#' # in high resolution
+#' xrange <- range(tacsat$SI_LONG[idx])
+#' yrange <- range(tacsat$SI_LATI[idx])
+#' 
+#' map('worldHires',xlim=xrange,ylim=yrange,col="darkgreen",fill=TRUE,
+#'     resolution=1, bg="white", border=0)
+#' map.axes();
+#' mtext("Longitude",1,line=-2,outer=TRUE,cex=1.2,font=2)
+#' mtext("Latitude",2,line=-2,outer=TRUE,cex=1.2,font=2)
+#' 
+#' points(tacsat$SI_LONG[idx],tacsat$SI_LATI[idx],cex=0.1,pch=16,col="red")
+#' 
+#' @export pointInHarbour
+pointInHarbour <- function(lon,lat,harbours,rowSize=30, returnNames=FALSE,saveHarbourList=TRUE){
+
+    xharb     <- harbours$lon
+    yharb     <- harbours$lat
+    rharb     <- harbours$range
+    harb      <- cbind(xharb,yharb,rharb)
+    if("Description" %in% colnames(harbours)) rownames(harb) <- harbours$Description
+    if("harbour" %in% colnames(harbours))     rownames(harb) <- harbours$harbour
+    harb      <- orderBy(~xharb+yharb,data=harb)
+
+    xys       <- data.frame(lon,lat)
+    ordxys    <- order(xys$lon,xys$lat)
+    lon       <- lon[ordxys]
+    lat       <- lat[ordxys]
+
+    nChunks   <- ceiling(length(lon)/rowSize)
+     store     <- rep(0, length(lon))
+    for(chunks in 1:nChunks){
+      if(chunks == nChunks){
+        x1    <- lon[(chunks*rowSize-rowSize+1):length(lon)]
+        y1    <- lat[(chunks*rowSize-rowSize+1):length(lon)]
+      } else {
+          x1    <- lon[(chunks*rowSize-rowSize+1):(chunks*rowSize)]
+          y1    <- lat[(chunks*rowSize-rowSize+1):(chunks*rowSize)]
+        }
+
+      xr        <- range(x1,na.rm=TRUE); xr <- c(xr[1]-0.05,xr[2]+0.05)
+      yr        <- range(y1,na.rm=TRUE); yr <- c(yr[1]-0.05,yr[2]+0.05)
+      res1      <- which(harb[,"xharb"] >= xr[1] & harb[,"xharb"] <= xr[2])
+      res2      <- which(harb[,"yharb"] >= yr[1] & harb[,"yharb"] <= yr[2])
+      res3      <- res1[which(is.na(pmatch(res1,res2))==FALSE)]
+
+      if(length(res3)>0){
+        for(hars in res3){
+          #print(hars)
+          x2  <- harb[hars,"xharb"]
+          y2  <- harb[hars,"yharb"]
+
+          pd  <- pi/180
+
+          a1  <- sin(((y2-y1)*pd)/2)
+          a2  <- cos(y1*pd)
+          a3  <- cos(y2*pd)
+          a4  <- sin(((x2-x1)*pd)/2)
+          a   <- a1*a1+a2*a3*a4*a4
+
+          c   <- 2*atan2(sqrt(a),sqrt(1-a));
+          R   <- 6371;
+          dx1 <- R*c
+
+          res <- numeric(length(x1))
+          idx <- which(dx1<=harb[hars,"rharb"])
+          res[idx] <- 1
+          if(returnNames){
+            res[idx]  <- rownames(harb)[hars]  # overwrite '1' with the port names
+            if(chunks==nChunks){
+              idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):length(lon)][idx] == "0")]
+              store[(chunks*rowSize-rowSize+1):length(lon)][idx2] <- res[idx2]
+            } else {
+                idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx] == "0")]
+                store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx2] <- res[idx2]
+              }
+          } else {
+              if(chunks==nChunks){
+                store[(chunks*rowSize-rowSize+1):length(lon)]   <- store[(chunks*rowSize-rowSize+1):length(lon)]+res
+              } else {
+                  store[(chunks*rowSize-rowSize+1):(chunks*rowSize)] <- store[(chunks*rowSize-rowSize+1):(chunks*rowSize)]+res
+                }
+            }
+        }
+      }
+    }
+    if(returnNames == FALSE) store[which(store>0)] <- 1
+    #Get order in tacsat back
+    store[ordxys] <- store
+
+    if(returnNames) store <- replace(store, store=="0", NA)
+    if(saveHarbourList) write.table(harbours,file="harbourList_pointInHarbour.txt",append=FALSE,sep="\t")
+
+return(store)}
+    
+
+
diff --git a/vmstools/R/pointOnLand.r b/vmstools/R/pointOnLand.r
index 01874f3..95d38e2 100644
--- a/vmstools/R/pointOnLand.r
+++ b/vmstools/R/pointOnLand.r
@@ -1,24 +1,57 @@
-pointOnLand <- function(tacsat,lands,proj4string=NULL){
-      if(class(lands) != "SpatialPolygons") stop("'lands' must be specified as class 'SpatialPolygons'")
-
-      totres      <- rep(0,length(tacsat$SI_LONG))
-
-      if(is.null(proj4string)==TRUE){
-        #No projection string used
-        if(is.na(proj4string(lands))==FALSE) stop("Projection defined for lands, use proj4string argument in function")
-        spPoint           <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI))
-        idx               <- over(spPoint,lands)
-        totres[which(is.na(idx)==FALSE)] <- 1
-        totres[which(is.na(idx)==TRUE)] <- 0
-
-      } else {
-          #Use projection string
-          proj4string(lands)<- proj4string
-          spPoint           <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI),proj4string=proj4string)
-          idx               <- over(spPoint,lands)
-          totres[which(is.na(idx)==FALSE)] <- 1
-          totres[which(is.na(idx)==TRUE)] <- 0
-        }
-return(totres)}
-
-
+#' Find points on land given a set of coordinates
+#' 
+#' Find the points that are on land given a set of coordinates and polygons
+#' that determine the part that is land
+#' 
+#' With many coordinates, the checking might take longer.
+#' 
+#' @param tacsat Tacsat file
+#' @param lands Polygon of area that is considered to be land
+#' @param proj4string Projection string, default to NULL.
+#' @return Returns a vector with values 0 and 1. 1 indicating points on land, 0
+#' indicating points not on land.
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{pointInHarbour}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' 
+#' data(tacsat)
+#' data(europa)
+#' tacsat  <- tacsat[1:1000,]
+#' tacsat  <- sortTacsat(tacsat)
+#' 
+#' pols    <- lonLat2SpatialPolygons(lst=lapply(as.list(sort(unique(europa$SID))),
+#'                 function(x){data.frame(SI_LONG=subset(europa,SID==x)$X,
+#'                                        SI_LATI=subset(europa,SID==x)$Y)}))
+#' idx     <- pointOnLand(tacsat,pols);
+#' idx     <- which(idx == 1)
+#' 
+#' plotMap(europa,xlim=c(0,10),ylim=c(48,62))
+#' points(tacsat$SI_LONG[idx],tacsat$SI_LATI[idx],col="red",cex=0.5,pch=19)
+#' 
+#' @export pointOnLand
+pointOnLand <- function(tacsat,lands,proj4string=NULL){
+      if(class(lands) != "SpatialPolygons") stop("'lands' must be specified as class 'SpatialPolygons'")
+
+      totres      <- rep(0,length(tacsat$SI_LONG))
+
+      if(is.null(proj4string)==TRUE){
+        #No projection string used
+        if(is.na(proj4string(lands))==FALSE) stop("Projection defined for lands, use proj4string argument in function")
+        spPoint           <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI))
+        idx               <- over(spPoint,lands)
+        totres[which(is.na(idx)==FALSE)] <- 1
+        totres[which(is.na(idx)==TRUE)] <- 0
+
+      } else {
+          #Use projection string
+          proj4string(lands)<- proj4string
+          spPoint           <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI),proj4string=proj4string)
+          idx               <- over(spPoint,lands)
+          totres[which(is.na(idx)==FALSE)] <- 1
+          totres[which(is.na(idx)==TRUE)] <- 0
+        }
+return(totres)}
+
+
diff --git a/vmstools/R/poolEflaloSpecies.r b/vmstools/R/poolEflaloSpecies.r
index 27512bd..a94a1d7 100644
--- a/vmstools/R/poolEflaloSpecies.r
+++ b/vmstools/R/poolEflaloSpecies.r
@@ -1,28 +1,42 @@
- # reduce the size of the eflalo data by merging species
- # threshold in euros
-poolEflaloSpecies <- function(eflalo, threshold.value=3000000, code="MZZ"){
-    if(!paste("LE_KG_", code, sep='') %in% colnames(eflalo)) {
-              eflalo [, paste("LE_KG_", code, sep='')] <- NA
-               eflalo [, paste("LE_EURO_", code, sep='')] <- NA
-              }
-    get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
-    nm         <- names(eflalo)
-    idx.col.w  <- grep('KG', nm) # index columns weight
-    idx.col.v  <- grep('EURO', nm)  # index columns value
-    v          <- apply(eflalo [,idx.col.v], 2, sum, na.rm=TRUE)
-    sp.to.merge.v <- names(v)[v < threshold.value] # e.g. threshold in euros
-    sp.to.merge.w <- paste('LE_KG_', get.sp(sp.to.merge.v ), sep='')
-    sp.to.merge.w <- sp.to.merge.w[sp.to.merge.w != paste("LE_KG_", code, sep='')]
-    sp.to.merge.v <- sp.to.merge.v[sp.to.merge.v != paste("LE_EURO_", code, sep='')]
-    OTH.w    <- apply(eflalo[,sp.to.merge.w], 1, sum, na.rm=TRUE)
-    OTH.v    <- apply(eflalo[,sp.to.merge.v], 1, sum, na.rm=TRUE)
-    eflalo  <- eflalo [, !nm %in% c(sp.to.merge.w, sp.to.merge.v)]
-    eflalo[,paste("LE_KG_", code, sep='')] <- replace(eflalo[,paste("LE_KG_", code, sep='')], 
-                                                  is.na(eflalo[,paste("LE_KG_", code, sep='')]), 0)
-    eflalo[,paste("LE_KG_", code, sep='')]   <- eflalo[,paste("LE_KG_", code, sep='')] + OTH.w
-    eflalo[,paste("LE_EURO_", code, sep='')] <- replace(eflalo[,paste("LE_EURO_", code, sep='')], 
-                                                  is.na(eflalo[,paste("LE_EURO_", code, sep='')]), 0)
-    eflalo[,paste("LE_EURO_", code, sep='')] <- eflalo[,paste("LE_EURO_", code, sep='')] + OTH.v
-  return(eflalo)
-  }
-              
\ No newline at end of file
+ # reduce the size of the eflalo data by merging species
+ # threshold in euros
+
+
+#' merge species in eflalo to reduce the size of data according to a threshold
+#' in euros
+#' 
+#' merge species in eflalo to reduce the size of data according to a threshold
+#' in euros
+#' 
+#' 
+#' @param eflalo data.frame, eflalo format
+#' @param threshold.value numeric, in euro
+#' @param code character, a code for the species 'other'
+#' @author Francois Bastardie
+#' @export poolEflaloSpecies
+poolEflaloSpecies <- function(eflalo, threshold.value=3000000, code="MZZ"){
+    if(!paste("LE_KG_", code, sep='') %in% colnames(eflalo)) {
+              eflalo [, paste("LE_KG_", code, sep='')] <- NA
+               eflalo [, paste("LE_EURO_", code, sep='')] <- NA
+              }
+    get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3]))
+    nm         <- names(eflalo)
+    idx.col.w  <- grep('KG', nm) # index columns weight
+    idx.col.v  <- grep('EURO', nm)  # index columns value
+    v          <- apply(eflalo [,idx.col.v], 2, sum, na.rm=TRUE)
+    sp.to.merge.v <- names(v)[v < threshold.value] # e.g. threshold in euros
+    sp.to.merge.w <- paste('LE_KG_', get.sp(sp.to.merge.v ), sep='')
+    sp.to.merge.w <- sp.to.merge.w[sp.to.merge.w != paste("LE_KG_", code, sep='')]
+    sp.to.merge.v <- sp.to.merge.v[sp.to.merge.v != paste("LE_EURO_", code, sep='')]
+    OTH.w    <- apply(eflalo[,sp.to.merge.w], 1, sum, na.rm=TRUE)
+    OTH.v    <- apply(eflalo[,sp.to.merge.v], 1, sum, na.rm=TRUE)
+    eflalo  <- eflalo [, !nm %in% c(sp.to.merge.w, sp.to.merge.v)]
+    eflalo[,paste("LE_KG_", code, sep='')] <- replace(eflalo[,paste("LE_KG_", code, sep='')], 
+                                                  is.na(eflalo[,paste("LE_KG_", code, sep='')]), 0)
+    eflalo[,paste("LE_KG_", code, sep='')]   <- eflalo[,paste("LE_KG_", code, sep='')] + OTH.w
+    eflalo[,paste("LE_EURO_", code, sep='')] <- replace(eflalo[,paste("LE_EURO_", code, sep='')], 
+                                                  is.na(eflalo[,paste("LE_EURO_", code, sep='')]), 0)
+    eflalo[,paste("LE_EURO_", code, sep='')] <- eflalo[,paste("LE_EURO_", code, sep='')] + OTH.v
+  return(eflalo)
+  }
+              
diff --git a/vmstools/R/predictMetier.r b/vmstools/R/predictMetier.r
index 1e1aab3..3fcdd64 100644
--- a/vmstools/R/predictMetier.r
+++ b/vmstools/R/predictMetier.r
@@ -1,55 +1,131 @@
-predictMetier=function(learningData=Step1,clustersAffectation=clust2007,newData=datPred){
-
-    # Select only the columns of newData corresponding to species of learningData
-    namesSpecies=colnames(learningData)
-    le_id_newData=rownames(newData)
-    newData=newData[,namesSpecies]
-    # Transform quantities of newData to percentages of each logevent totale catch
-    newData=transformation_proportion(newData)
-
-    # Select the logevents without catch for the selected species
-    nullCatch=which(apply(newData,1,sum)==0)
-    newDataWithoutCatch=newData[nullCatch,]
-    if(length(nullCatch)==1){
-      le_id_newDataWithoutCatch=names(nullCatch)
-    }else{
-      le_id_newDataWithoutCatch=rownames(newDataWithoutCatch)
-    }
-
-    # Select the logevents with catch for the selected species
-    positiveCatch=setdiff(1:nrow(newData),nullCatch)
-    newDataWithCatch=newData[positiveCatch,]
-    
-    # Prepare learningData for the discriminant analysis
-    le_id_learningData=rownames(learningData)
-    nbSpeciesLearningData=ncol(learningData)   # Number of species of learningData
-    nameLearningDataSpecies=colnames(learningData)
-    learningData=as.data.frame(cbind(learningData,as.factor(clustersAffectation)))
-    colnames(learningData)=c(colnames(learningData[1:nbSpeciesLearningData]),"clust")
-
-    # Calibrate the model with learningData
-    print("------------ Learning ------------")
-    learning=fda(clust~.,data=learningData)
-    
-    # Predict the metier of each logevent in newDataWithCatch
-    print("------------ Predicting ------------")
-    result=predict(learning,newdata=newDataWithCatch)
-    predictedClusters=data.frame(rownames(newDataWithCatch),result)
-    dimnames(predictedClusters)[[2]]=c("LE_ID","Class")
-    
-    # Give the metier "0" (unknown metier) for the logevents in newDataWithoutCatch
-    if(length(le_id_newDataWithoutCatch)!=0){
-      notPredictedClusters=data.frame(le_id_newDataWithoutCatch,
-          as.factor(rep(0,length(le_id_newDataWithoutCatch))))
-      dimnames(notPredictedClusters)[[2]]=c("LE_ID","Class")
-    }
-    
-    # Create a table linking "LE_ID" and metier for each logevent in newData
-    if(length(le_id_newDataWithoutCatch)!=0){
-      clustersForAllLogevents=rbind(predictedClusters,notPredictedClusters)
-    }else{
-      clustersForAllLogevents=predictedClusters
-    }
-    
-    return(clustersForAllLogevents=clustersForAllLogevents)
-}
\ No newline at end of file
+#' Finding metiers for a new reduced EFLALO dataset from a reduced EFLALO
+#' dataset for which the metiers are known.
+#' 
+#' This function permits to determine the metiers of new logevents belonging to
+#' a new reduced EFLALO dataset, from a reduced EFLALO dataset for which the
+#' metiers are known. For example, this function permits to determine the
+#' metiers of logevents realized in 2008, basing on metiers of logevents
+#' realized in 2007. It works with a discriminante analysis ran with the
+#' function fda. In the first time, the function fda calibrates a model basing
+#' on the dataset for which the metiers are known. In the second time, the
+#' function predict is used to predict the metiers of new logevents, thanks to
+#' the previous model.
+#' 
+#' 
+#' @param learningData A numerical matrix with logevents as lines and species
+#' as columns, with percentage values (between 0 and 100) of each species in
+#' the logevent catches. Logevent ID (LE_ID) should be as row names. Typically,
+#' this table will be produced from a eflalo dataset at the step 1 of the
+#' metier analysis, using the function extractTableMainSpecies()
+#' @param clustersAffectation An integer vector of length n, the number of
+#' logevents in learningData, giving for each logevent the number of the
+#' cluster to which it belongs. This vector will be produced at the step 3 of
+#' the metier analysis, using the function getMetierClusters().
+#' @param newData A numerical matrix with logevents as lines and species as
+#' columns, with raw values of each species in the logevent catches.  Logevent
+#' ID (LE_ID) should be as row names.
+#' @return A data frame giving the logevent ID (LE_ID) (first column) and the
+#' number of the cluster to which it belongs (second column), for all logevents
+#' in newData. The cluster number "0" corresponds to the unknown metier.
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @seealso \code{\link{selectMainSpecies}},
+#' \code{\link{extractTableMainSpecies}}, \code{\link{getTableAfterPCA}},
+#' \code{\link{getMetierClusters}}
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @examples
+#' 
+#' 
+#'   \dontrun{
+#' 
+#'   data(eflalo)
+#' 
+#'   # Format
+#'   newEflalo <- formatEflalo(eflalo)
+#'   newEflalo <- newEflalo[newEflalo$LE_GEAR=="OTB",]
+#'   
+#'   # Note that output plots will be sent to getwd()
+#'   analysisName <- "metier_analysis_OTB"
+#' 
+#'   # Prepare datPred
+#'   datPred <- newEflalo[,c("LE_ID",grep("EURO",names(newEflalo),value=TRUE))]
+#'   datPred[is.na(datPred)] <- 0
+#'   names(datPred)[-1] <- unlist(lapply(strsplit(names(datPred[,-1]),"_"),
+#'                                function(x) x[[3]]))
+#'   le_id_datPred <- datPred[,"LE_ID"]
+#'   datPred <- datPred[,-1]
+#'   datPred <- as.matrix(datPred)
+#'   rownames(datPred) <- le_id_datPred
+#' 
+#'   if (methMetier=="hac")     clustersAffectation <- Step3$clusters
+#'   if (methMetier=="clara")   clustersAffectation <- Step3$clusters$clustering
+#'   if (methMetier=="kmeans")  clustersAffectation <- Step3$clusters$cluster
+#' 
+#'   # Predict the metiers of the new logbook events of newData from the metiers
+#'   # found in Step 3
+#'   metierPred <- predictMetier(learningData=Step1,
+#'                   clustersAffectation=clustersAffectation,newData=datPred)
+#'     
+#'   }
+#' 
+#' 
+#' @export predictMetier
+predictMetier=function(learningData=Step1,clustersAffectation=clust2007,newData=datPred){
+
+    # Select only the columns of newData corresponding to species of learningData
+    namesSpecies=colnames(learningData)
+    le_id_newData=rownames(newData)
+    newData=newData[,namesSpecies]
+    # Transform quantities of newData to percentages of each logevent totale catch
+    newData=transformation_proportion(newData)
+
+    # Select the logevents without catch for the selected species
+    nullCatch=which(apply(newData,1,sum)==0)
+    newDataWithoutCatch=newData[nullCatch,]
+    if(length(nullCatch)==1){
+      le_id_newDataWithoutCatch=names(nullCatch)
+    }else{
+      le_id_newDataWithoutCatch=rownames(newDataWithoutCatch)
+    }
+
+    # Select the logevents with catch for the selected species
+    positiveCatch=setdiff(1:nrow(newData),nullCatch)
+    newDataWithCatch=newData[positiveCatch,]
+    
+    # Prepare learningData for the discriminant analysis
+    le_id_learningData=rownames(learningData)
+    nbSpeciesLearningData=ncol(learningData)   # Number of species of learningData
+    nameLearningDataSpecies=colnames(learningData)
+    learningData=as.data.frame(cbind(learningData,as.factor(clustersAffectation)))
+    colnames(learningData)=c(colnames(learningData[1:nbSpeciesLearningData]),"clust")
+
+    # Calibrate the model with learningData
+    print("------------ Learning ------------")
+    learning=fda(clust~.,data=learningData)
+    
+    # Predict the metier of each logevent in newDataWithCatch
+    print("------------ Predicting ------------")
+    result=predict(learning,newdata=newDataWithCatch)
+    predictedClusters=data.frame(rownames(newDataWithCatch),result)
+    dimnames(predictedClusters)[[2]]=c("LE_ID","Class")
+    
+    # Give the metier "0" (unknown metier) for the logevents in newDataWithoutCatch
+    if(length(le_id_newDataWithoutCatch)!=0){
+      notPredictedClusters=data.frame(le_id_newDataWithoutCatch,
+          as.factor(rep(0,length(le_id_newDataWithoutCatch))))
+      dimnames(notPredictedClusters)[[2]]=c("LE_ID","Class")
+    }
+    
+    # Create a table linking "LE_ID" and metier for each logevent in newData
+    if(length(le_id_newDataWithoutCatch)!=0){
+      clustersForAllLogevents=rbind(predictedClusters,notPredictedClusters)
+    }else{
+      clustersForAllLogevents=predictedClusters
+    }
+    
+    return(clustersForAllLogevents=clustersForAllLogevents)
+}
diff --git a/vmstools/R/raiseTacsat.r b/vmstools/R/raiseTacsat.r
index 496db90..f34d1ca 100644
--- a/vmstools/R/raiseTacsat.r
+++ b/vmstools/R/raiseTacsat.r
@@ -1,138 +1,193 @@
-raiseTacsat <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF","SI_DAY","LE_RECT","SI_YEAR"),sortBy=TRUE){
-
-
-  #-----------------------------------------------------------------------------
-  #- Add columns on dates
-  #-----------------------------------------------------------------------------
-  if(!"SI_DATIM" %in% colnames(tacsat))
-    tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-  if(!"FT_DDATIM" %in% colnames(eflalo))
-    eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
-  if(!"FT_LDATIM" %in% colnames(eflalo))
-    eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
-  if(!"INTV" %in% colnames(tacsat))
-    stop("Specify time interval column in tacsat (e.g. use intervalTacsat)")
-    
-  #- Add date notation
-  if("SI_DAY" %in% unique(c(by))){
-    eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_DAY <- yday(tacsat$SI_DATIM)
-  }
-  if("SI_YEAR" %in% unique(c(by))){
-    eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_YEAR<- year(tacsat$SI_DATIM)
-  }
-  if("SI_MONTH"%in% unique(c(by))){
-    eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_MONTH<- month(tacsat$SI_DATIM)
-  }
-  if("SI_WEEK"%in% unique(c(by))){
-    eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_WEEK<- week(tacsat$SI_DATIM)
-  }
-  if("SI_QUARTER"%in% unique(c(by))){
-    eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
-    tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM)
-  }
-  
-  #- Select time levels + order these
-  timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR")
-  byTime <- by[which(by %in% timePos)]
-  byTime <- timePos[which(timePos %in% byTime)]
-
-  #-----------------------------------------------------------------------------
-  #-Add spatial location
-  #-----------------------------------------------------------------------------
-  if("LE_RECT" %in% by)
-    tacsat$LE_RECT <- ICESrectangle(tacsat)
-  if("LE_ICESAREA" %in% by){
-    data(ICESareas)
-    tacsat$LE_AREA <- ICESarea(tacsat,ICESareas)
-    tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER"
-    eflonlat       <- ICESrectangle2LonLat(eflalo$LE_RECT)
-    eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas)
-    eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER"
-  }
-  
-  #- Select area levels + order these
-  areaPos<- c("LE_RECT","LE_ICESAREA")
-  byArea <- by[which(by %in% areaPos)]
-  byArea <- areaPos[which(areaPos %in% byArea)]
-  
-  #-----------------------------------------------------------------------------
-  #- Calculate time possible to spend per day fishing
-  #-----------------------------------------------------------------------------
-  eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT")
-  eflalo$INTV      <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins"))
-  eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0),
-                        c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")),
-                        ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
-                          c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")),
-                          ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
-                            c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")),
-                            1440)))
-  # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip
-  eflalo$dummy     <- 1
-  eflalo           <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T)
-  colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF"
-  if("SI_DAY" %in% by){
-    eflalo$INTVDAY   <- eflalo$FT_DURDAY / eflalo$NR_FT_REF
-  } else {
-    eflalo$INTVDAY   <- eflalo$INTV / eflalo$NR_FT_REF
-    }
-  eflalo           <- eflalo[,-grep("dummy",colnames(eflalo))]
-  eflalo           <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))]
-  eflalo           <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))]
-  
-  #-----------------------------------------------------------------------------
-  #- Check if all colums in both tacsat and eflalo are available
-  #-----------------------------------------------------------------------------
-  if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0)
-    stop("elements specified in 'by' are not available as columns in either tacsat or eflalo")
-  
-  #- If estimated proportion of fishing column is missing
-  if(!"PropFish" %in% colnames(eflalo)){
-    eflalo$PropFish             <- 1
-  }
-
-  #- Start to aggregate and raise tacsat, but drop elements through a for loop
-  #  First aggregation without dropping any element
-  tacsat$INTVR  <- 0
-
-  cat("Start minutes eflalo",sum(eflalo$INTVDAY * eflalo$PropFish,na.rm=T),"\n")
-  cat("Start minutes tacsat",sum(tacsat$INTV,na.rm=T),"\n")
-
-  #- Order elements by uniqueness
-  if(sortBy){
-    if(length(by)>1)
-      by          <- names(sort(apply(eflalo[,by],2,function(x){length(unique(x))})))
-  }
-
-  #- Drop element by element and merge
-  byOrig    <- by
-  INTVR     <- numeric(nrow(tacsat))
-  INTVDAY   <- eflalo$INTVDAY
-  for(j in 0:(length(by)-1)){
-    if(j != 0)
-      by  <- rev(rev(byOrig)[-c(1:j)])
-    unEf  <- apply(unique(t(t(eflalo[,by]))),1,paste,collapse="_")
-    pEf   <- apply(t(t(eflalo[,by])),1,paste,collapse="_")
-    pTa   <- apply(t(t(tacsat[,by])),1,paste,collapse="_")
-    cat("Raising by",by,"\n")
-    for(i in 1:length(unEf)){
-      idxT <- which(pTa %in% unEf[i])
-      idxE <- which(pEf %in% unEf[i])
-      if(length(idxT)>0 & length(idxE)>0){
-        INTVR[idxT]   <- sum(INTVDAY[idxE] * eflalo$PropFish[idxE],na.rm=T)/sum(tacsat$INTV[idxT],na.rm=T) * tacsat$INTV[idxT] + INTVR[idxT]
-        INTVDAY[idxE] <- 0
-      }
-    }
-  }
-  tacsat$INTVR        <- INTVR
-  eflalo$INTVDAY      <- INTVDAY
-
-  cat("Final minutes tacsat",sum(tacsat$INTVR,na.rm=T),"\n")
-  cat("final remaing minutes eflalo",sum(eflalo$INTVDAY,na.rm=T),"\n")
-
-return(tacsat)}
-
+#' Raise the effort in tacsat to the effort available in eflalo.
+#' 
+#' The eflalo dataset contains a total amount of effort. This total amount can
+#' be set as the total amount of effort to be found in tacsat too. This
+#' function raises the total amount of effort in tacsat to that of eflalo.
+#' 
+#' The function looks also for a 'PropFish' column. This allows users to scale
+#' down the total effort in eflalo according to the proportion that is assumed
+#' to be fishing effort. This helps to raise the tacsat data where only records
+#' which have SI_STATE = 1 will be used
+#' 
+#' @param tacsat Tacsat object
+#' @param eflalo Eflalo object
+#' @param by By can be: "SI_DAY", "SI_WEEK", "SI_MONTH",
+#' "SI_YEAR","LE_RECT","LE_ICESAREA","SI_QUARTER",
+#' @param sortBy Logical. If items in 'by' should be ordered by descending
+#' uniqueness (if TRUE match between tacsat and eflalo increases)
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{splitAmongPings}}, \code{\link{estimatePropFishing}}
+#' @examples
+#' 
+#' data(tacsat)
+#' data(eflalo)
+#' 
+#' #- Calculate effort in tacsat dataset and define activity (fishing)
+#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' tacsatp <- sortTacsat(tacsatp)
+#' 
+#' #- Define effort on a trip level
+#' tacsatp <- intervalTacsat(tacsatp,level="trip",fill.na=T,weight=c(0.5,0.5))
+#' tacsatp1<- subset(tacsatp,FT_REF==0)
+#' tacsatp2<- subset(tacsatp,FT_REF!=0)
+#' #- Define effort on a vessel level
+#' tacsatp1<- intervalTacsat(tacsatp1,level="vessel",fill.na=T,weight=c(0.5,0.5))
+#' 
+#' #- Combine the two datasets with effort associated
+#' tacsatp <- rbind(tacsatp1,tacsatp2)
+#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]
+#' 
+#' #- Quick and dirty approach to defin activity
+#' tacsatp <- filterTacsat(tacsatp,st=c(4,8))
+#' tacsatp$INTV[which(tacsatp$INTV>232)] <- 115
+#' tacsatp$INTV[which(is.na(tacsatp$INTV)==T)] <- 115
+#' tacsat  <- tacsatp
+#' tacsat$SI_STATE <- 1
+#' 
+#' #- Estimate the proportion fishing first (not necessary)
+#' eflalo  <- estimatePropFishing(tacsat,eflalo,by=c("VE_REF","FT_REF"))
+#' 
+#' #- Raise the tacsat dataset to the effort of the eflalo dataset
+#' tacsat  <- raiseTacsat(tacsat,eflalo,by=c("LE_GEAR","SI_DAY","SI_YEAR"),sortBy=T)
+#' tacsat  <- raiseTacsat(tacsat,eflalo,by=c("LE_GEAR","SI_DAY","SI_YEAR"),sortBy=F)
+#' 
+#' 
+#' @export raiseTacsat
+raiseTacsat <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF","SI_DAY","LE_RECT","SI_YEAR"),sortBy=TRUE){
+
+
+  #-----------------------------------------------------------------------------
+  #- Add columns on dates
+  #-----------------------------------------------------------------------------
+  if(!"SI_DATIM" %in% colnames(tacsat))
+    tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+  if(!"FT_DDATIM" %in% colnames(eflalo))
+    eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
+  if(!"FT_LDATIM" %in% colnames(eflalo))
+    eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT")
+  if(!"INTV" %in% colnames(tacsat))
+    stop("Specify time interval column in tacsat (e.g. use intervalTacsat)")
+    
+  #- Add date notation
+  if("SI_DAY" %in% unique(c(by))){
+    eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_DAY <- yday(tacsat$SI_DATIM)
+  }
+  if("SI_YEAR" %in% unique(c(by))){
+    eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_YEAR<- year(tacsat$SI_DATIM)
+  }
+  if("SI_MONTH"%in% unique(c(by))){
+    eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_MONTH<- month(tacsat$SI_DATIM)
+  }
+  if("SI_WEEK"%in% unique(c(by))){
+    eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_WEEK<- week(tacsat$SI_DATIM)
+  }
+  if("SI_QUARTER"%in% unique(c(by))){
+    eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y"))
+    tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM)
+  }
+  
+  #- Select time levels + order these
+  timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR")
+  byTime <- by[which(by %in% timePos)]
+  byTime <- timePos[which(timePos %in% byTime)]
+
+  #-----------------------------------------------------------------------------
+  #-Add spatial location
+  #-----------------------------------------------------------------------------
+  if("LE_RECT" %in% by)
+    tacsat$LE_RECT <- ICESrectangle(tacsat)
+  if("LE_ICESAREA" %in% by){
+    data(ICESareas)
+    tacsat$LE_AREA <- ICESarea(tacsat,ICESareas)
+    tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER"
+    eflonlat       <- ICESrectangle2LonLat(eflalo$LE_RECT)
+    eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas)
+    eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER"
+  }
+  
+  #- Select area levels + order these
+  areaPos<- c("LE_RECT","LE_ICESAREA")
+  byArea <- by[which(by %in% areaPos)]
+  byArea <- areaPos[which(areaPos %in% byArea)]
+  
+  #-----------------------------------------------------------------------------
+  #- Calculate time possible to spend per day fishing
+  #-----------------------------------------------------------------------------
+  eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT")
+  eflalo$INTV      <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins"))
+  eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0),
+                        c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")),
+                        ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
+                          c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")),
+                          ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0),
+                            c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")),
+                            1440)))
+  # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip
+  eflalo$dummy     <- 1
+  eflalo           <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T)
+  colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF"
+  if("SI_DAY" %in% by){
+    eflalo$INTVDAY   <- eflalo$FT_DURDAY / eflalo$NR_FT_REF
+  } else {
+    eflalo$INTVDAY   <- eflalo$INTV / eflalo$NR_FT_REF
+    }
+  eflalo           <- eflalo[,-grep("dummy",colnames(eflalo))]
+  eflalo           <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))]
+  eflalo           <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))]
+  
+  #-----------------------------------------------------------------------------
+  #- Check if all colums in both tacsat and eflalo are available
+  #-----------------------------------------------------------------------------
+  if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0)
+    stop("elements specified in 'by' are not available as columns in either tacsat or eflalo")
+  
+  #- If estimated proportion of fishing column is missing
+  if(!"PropFish" %in% colnames(eflalo)){
+    eflalo$PropFish             <- 1
+  }
+
+  #- Start to aggregate and raise tacsat, but drop elements through a for loop
+  #  First aggregation without dropping any element
+  tacsat$INTVR  <- 0
+
+  cat("Start minutes eflalo",sum(eflalo$INTVDAY * eflalo$PropFish,na.rm=T),"\n")
+  cat("Start minutes tacsat",sum(tacsat$INTV,na.rm=T),"\n")
+
+  #- Order elements by uniqueness
+  if(sortBy){
+    if(length(by)>1)
+      by          <- names(sort(apply(eflalo[,by],2,function(x){length(unique(x))})))
+  }
+
+  #- Drop element by element and merge
+  byOrig    <- by
+  INTVR     <- numeric(nrow(tacsat))
+  INTVDAY   <- eflalo$INTVDAY
+  for(j in 0:(length(by)-1)){
+    if(j != 0)
+      by  <- rev(rev(byOrig)[-c(1:j)])
+    unEf  <- apply(unique(t(t(eflalo[,by]))),1,paste,collapse="_")
+    pEf   <- apply(t(t(eflalo[,by])),1,paste,collapse="_")
+    pTa   <- apply(t(t(tacsat[,by])),1,paste,collapse="_")
+    cat("Raising by",by,"\n")
+    for(i in 1:length(unEf)){
+      idxT <- which(pTa %in% unEf[i])
+      idxE <- which(pEf %in% unEf[i])
+      if(length(idxT)>0 & length(idxE)>0){
+        INTVR[idxT]   <- sum(INTVDAY[idxE] * eflalo$PropFish[idxE],na.rm=T)/sum(tacsat$INTV[idxT],na.rm=T) * tacsat$INTV[idxT] + INTVR[idxT]
+        INTVDAY[idxE] <- 0
+      }
+    }
+  }
+  tacsat$INTVR        <- INTVR
+  eflalo$INTVDAY      <- INTVDAY
+
+  cat("Final minutes tacsat",sum(tacsat$INTVR,na.rm=T),"\n")
+  cat("final remaing minutes eflalo",sum(eflalo$INTVDAY,na.rm=T),"\n")
+
+return(tacsat)}
+
diff --git a/vmstools/R/rbindEflalo.r b/vmstools/R/rbindEflalo.r
index c84af0f..c091e38 100644
--- a/vmstools/R/rbindEflalo.r
+++ b/vmstools/R/rbindEflalo.r
@@ -1,20 +1,41 @@
-rbindEflalo <- function(set1,set2){
-  cln1  <- colnames(set1)
-  cln2  <- colnames(set2)
-  if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets")
-  idx1  <- which(is.na(pmatch(cln1,cln2))==TRUE)
-  idx2  <- which(is.na(pmatch(cln2,cln1))==TRUE)
-
-  if(length(idx1)>0){
-    for(i in idx1) set2 <- cbind(set2,NA)
-    colnames(set2) <- c(cln2,cln1[idx1])}
-  if(length(idx2)>0){
-    for(i in idx2) set1 <- cbind(set1,NA)
-    colnames(set1) <- c(cln1,cln2[idx2])}
-  cln1  <- colnames(set1)
-  cln2  <- colnames(set2)
-  mtch  <- pmatch(cln1,cln2)
-  if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names")
-  set3  <- rbind(set1,set2[,cln2[mtch]])
-return(set3)}
-
+#' Row bind two eflalo sets together
+#' 
+#' Row binds two eflalo sets together while taking differences in column names
+#' into account
+#' 
+#' 
+#' @param set1 Eflalo set 1
+#' @param set2 Eflalo set 2
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{rbindTacsat}}, \code{\link{do.call}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' data(eflalo)
+#' set1 <- eflalo
+#' set2 <- eflalo[seq(1,100,5),]
+#' 
+#' combined <- rbindEflalo(set1,set2)
+#' 
+#' 
+#' @export rbindEflalo
+rbindEflalo <- function(set1,set2){
+  cln1  <- colnames(set1)
+  cln2  <- colnames(set2)
+  if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets")
+  idx1  <- which(is.na(pmatch(cln1,cln2))==TRUE)
+  idx2  <- which(is.na(pmatch(cln2,cln1))==TRUE)
+
+  if(length(idx1)>0){
+    for(i in idx1) set2 <- cbind(set2,NA)
+    colnames(set2) <- c(cln2,cln1[idx1])}
+  if(length(idx2)>0){
+    for(i in idx2) set1 <- cbind(set1,NA)
+    colnames(set1) <- c(cln1,cln2[idx2])}
+  cln1  <- colnames(set1)
+  cln2  <- colnames(set2)
+  mtch  <- pmatch(cln1,cln2)
+  if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names")
+  set3  <- rbind(set1,set2[,cln2[mtch]])
+return(set3)}
+
diff --git a/vmstools/R/rbindTacsat.r b/vmstools/R/rbindTacsat.r
index 5310e4a..c19e5c3 100644
--- a/vmstools/R/rbindTacsat.r
+++ b/vmstools/R/rbindTacsat.r
@@ -1,23 +1,44 @@
-rbindTacsat <- function(set1,set2){
-  cln1  <- colnames(set1)
-  cln2  <- colnames(set2)
-  if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets")
-  idx1  <- which(is.na(pmatch(cln1,cln2))==TRUE)
-  idx2  <- which(is.na(pmatch(cln2,cln1))==TRUE)
-  
-  if(length(idx1)>0){
-    for(i in idx1) set2 <- cbind(set2,NA)
-    colnames(set2) <- c(cln2,cln1[idx1])}
-  if(length(idx2)>0){
-    for(i in idx2) set1 <- cbind(set1,NA)
-    colnames(set1) <- c(cln1,cln2[idx2])}
-  cln1  <- colnames(set1)
-  cln2  <- colnames(set2)
-  mtch  <- pmatch(cln1,cln2)
-  if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names")
-  set3  <- rbind(set1,set2[,cln2[mtch]])
-return(set3)}
-  
-
-
-
+#' Row bind two tacsat sets together
+#' 
+#' Row binds two tacsat sets together while taking differences in column names
+#' into account
+#' 
+#' 
+#' @param set1 Tacsat set 1
+#' @param set2 Tacsat set 2
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{rbindEflalo}}, \code{\link{do.call}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' set1 <- tacsat
+#' set2 <- tacsat[seq(1,100,5),]
+#' 
+#' combined <- rbindTacsat(set1,set2)
+#' 
+#' 
+#' @export rbindTacsat
+rbindTacsat <- function(set1,set2){
+  cln1  <- colnames(set1)
+  cln2  <- colnames(set2)
+  if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets")
+  idx1  <- which(is.na(pmatch(cln1,cln2))==TRUE)
+  idx2  <- which(is.na(pmatch(cln2,cln1))==TRUE)
+  
+  if(length(idx1)>0){
+    for(i in idx1) set2 <- cbind(set2,NA)
+    colnames(set2) <- c(cln2,cln1[idx1])}
+  if(length(idx2)>0){
+    for(i in idx2) set1 <- cbind(set1,NA)
+    colnames(set1) <- c(cln1,cln2[idx2])}
+  cln1  <- colnames(set1)
+  cln2  <- colnames(set2)
+  mtch  <- pmatch(cln1,cln2)
+  if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names")
+  set3  <- rbind(set1,set2[,cln2[mtch]])
+return(set3)}
+  
+
+
+
diff --git a/vmstools/R/readEflalo.r b/vmstools/R/readEflalo.r
index 03a15e3..742e86e 100644
--- a/vmstools/R/readEflalo.r
+++ b/vmstools/R/readEflalo.r
@@ -1,20 +1,47 @@
-readEflalo <- function(file,sep=",",dec="."){
-
-                    #Read the data
-                  res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE)
-
-                    #Perform checks
-                  if(any(!c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME",
-                            "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON",
-                            "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ") %in% colnames(res)))
-                              stop(paste(file,"needs correct header including",paste(c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME",
-                                  "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON",
-                                  "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ"))))
-                  if(length(grep("KG",colnames(res))) < 1) stop("Units of landings are not in KG")
-                  if(length(grep("LE_KG",colnames(res))) < 1) stop("No landing data provided")
-                  if(length(grep("LE_EURO",colnames(res))) < 1) warning("Currency used is different than 'EURO'")
-
-                    #Reformat the data
-                  res <- formatEflalo(res)
-
-              return(res)}
\ No newline at end of file
+#' Read Eflalo data into R
+#' 
+#' Reading eflalo data from delimited file into R, checking for obligatory
+#' columns and formatting all data columns.
+#' 
+#' 
+#' @param file file path + file name
+#' @param sep delimiter used in file (default to ',')
+#' @param dec decimal notation used in file (default to '.')
+#' @return Returns the formatted Eflalo dataset
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{readTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(eflalo)
+#' dir.create("C:/tmpEflalo")
+#' 
+#' #temporarily write tacsat file to disk to thereafter read it back in again
+#' write.table(eflalo, file = "C:/tmpEflalo/eflalo.csv", quote = TRUE, sep = ",",
+#'     eol = "\n", na = "NA", dec = ".", row.names = FALSE,col.names = TRUE)
+#' 
+#' #Read in tacsat file
+#' eflalo <- readEflalo("C:/tmpEflalo/eflalo.csv")
+#' 
+#' 
+#' @export readEflalo
+readEflalo <- function(file,sep=",",dec="."){
+
+                    #Read the data
+                  res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE)
+
+                    #Perform checks
+                  if(any(!c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME",
+                            "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON",
+                            "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ") %in% colnames(res)))
+                              stop(paste(file,"needs correct header including",paste(c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME",
+                                  "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON",
+                                  "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ"))))
+                  if(length(grep("KG",colnames(res))) < 1) stop("Units of landings are not in KG")
+                  if(length(grep("LE_KG",colnames(res))) < 1) stop("No landing data provided")
+                  if(length(grep("LE_EURO",colnames(res))) < 1) warning("Currency used is different than 'EURO'")
+
+                    #Reformat the data
+                  res <- formatEflalo(res)
+
+              return(res)}
diff --git a/vmstools/R/readTacsat.r b/vmstools/R/readTacsat.r
index 9c1f18d..44384ca 100644
--- a/vmstools/R/readTacsat.r
+++ b/vmstools/R/readTacsat.r
@@ -1,14 +1,45 @@
-readTacsat <- function(file,sep=",",dec="."){
-
-                    #Read the data
-                  res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE)
-
-                    #Perform checks
-                  if(any(!c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE") %in% colnames(res))) stop(paste(file,"needs correct header including",paste("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE")))
-                  if(any(res$SI_LATI > 90 | res$SI_LATI < -90,na.rm=TRUE) | any(res$SI_LONG > 180 | res$SI_LONG < -180,na.rm=TRUE)) stop("Longitudes or latitudes are out of range")
-                  if(any(res$SI_HE > 360 | res$SI_HE < 0,na.rm=TRUE)) stop("Heading out of range, must be between 0 - 360")
-
-                    #Reformat the data
-                  res <- formatTacsat(res)
-
-              return(res)}
\ No newline at end of file
+#' Read Tacsat data into R
+#' 
+#' Reading tacsat data from delimited file into R, checking for obligatory
+#' columns and formatting all data columns.
+#' 
+#' 
+#' @param file file path + file name
+#' @param sep delimiter used in file (default to ',')
+#' @param dec decimal notation used in file (default to '.')
+#' @return Returns the formatted Tacsat dataset
+#' @author Niels T. Hintzen
+#' @seealso \code{readEflalo()}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' dir.create("C:/tmpTacsat")
+#' 
+#' #temporarily write tacsat file to disk to thereafter read it back in again
+#' tacsat$SI_HE[which(tacsat$SI_HE>360 | tacsat$SI_HE < 0)] <- NA
+#' tacsat$SI_LONG[which(tacsat$SI_LONG < -180 | tacsat$SI_LONG > 180)] <- NA
+#' tacsat$SI_LATI[which(tacsat$SI_LATI < -90 | tacsat$SI_LATI > 90)] <- NA
+#' write.table(tacsat, file = "C:/tmpTacsat/tacsat.csv", quote = TRUE, sep = ",",
+#'         eol = "\n", na = "NA", dec = ".", row.names = FALSE,col.names = TRUE)
+#' 
+#' #Read in tacsat file
+#' tacsat <- readTacsat("C:/tmpTacsat/tacsat.csv")
+#' 
+#' 
+#' 
+#' @export readTacsat
+readTacsat <- function(file,sep=",",dec="."){
+
+                    #Read the data
+                  res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE)
+
+                    #Perform checks
+                  if(any(!c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE") %in% colnames(res))) stop(paste(file,"needs correct header including",paste("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE")))
+                  if(any(res$SI_LATI > 90 | res$SI_LATI < -90,na.rm=TRUE) | any(res$SI_LONG > 180 | res$SI_LONG < -180,na.rm=TRUE)) stop("Longitudes or latitudes are out of range")
+                  if(any(res$SI_HE > 360 | res$SI_HE < 0,na.rm=TRUE)) stop("Heading out of range, must be between 0 - 360")
+
+                    #Reformat the data
+                  res <- formatTacsat(res)
+
+              return(res)}
diff --git a/vmstools/R/scree.r b/vmstools/R/scree.r
index 95d4fee..afc146d 100644
--- a/vmstools/R/scree.r
+++ b/vmstools/R/scree.r
@@ -1,17 +1,45 @@
-###################################
-# Implementation of "scree-test"  #
-###################################
-
-scree=function(eig){
-  n=length(eig)
-  delta=numeric(n)
-  epsilon=numeric(n)
-  delta[2]=eig[2]-eig[1]
-  for (i in 3:n){
-    delta[i]=eig[i]-eig[i-1]
-    epsilon[i]=delta[i]-delta[i-1]
-  }
-  data=matrix(0,nrow=n,ncol=3)
-  data=cbind(valeurs_propres=eig, delta=delta, epsilon=epsilon)
-  return(data)
-}
+###################################
+# Implementation of "scree-test"  #
+###################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export scree
+scree=function(eig){
+  n=length(eig)
+  delta=numeric(n)
+  epsilon=numeric(n)
+  delta[2]=eig[2]-eig[1]
+  for (i in 3:n){
+    delta[i]=eig[i]-eig[i-1]
+    epsilon[i]=delta[i]-delta[i-1]
+  }
+  data=matrix(0,nrow=n,ncol=3)
+  data=cbind(valeurs_propres=eig, delta=delta, epsilon=epsilon)
+  return(data)
+}
diff --git a/vmstools/R/segmentTacsatSpeed.r b/vmstools/R/segmentTacsatSpeed.r
index 6c0850b..e25a8c8 100644
--- a/vmstools/R/segmentTacsatSpeed.r
+++ b/vmstools/R/segmentTacsatSpeed.r
@@ -1,417 +1,488 @@
-# do the distinction between fishing and non-fishing
-# by automatic detection of the fishing peak
-# adapted from library 'segmented'
-# F. Bastardie
-
-segmentTacsatSpeed <- function(tacsat,
-                               vessels= unique(tacsat$VE_REF),
-                               force.lower.bound=0.5,  
-                               gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'),
-                               general=list(
-                                   output.path=file.path('C:','output'),
-                                   visual.check=TRUE, 
-                                   a.year=2009,
-                                   what.speed="calculated", 
-                                    ),
-                                ...){
-
- 
-   lstargs <-  as.list( sys.call() ) # deprecated
-
-  # checks
-  if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE <- NA
-  if(!'idx' %in% colnames(tacsat))  tacsat$idx <- 1:nrow(tacsat)
-  cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n")
-  dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777")
- 
-  # add
-  tacsat$bound1 <- NA
-  tacsat$bound2 <- NA
-
-
-   # utils---
-  distAB.f <- function(A,B, .unit="km"){
-  # return the dist in km or nautical miles.
-  # coord input: e.g. A[54,10] means [54 N, 10 E]
-  # formula = cos (gamma) = sin LatA sin LatB + cos LatA cos LatB cos delta
-  # if gamma in degree, knowing that 1 degree = 60 minutes = 60 nm, then gamma in degree * 60 = dist in nm
-  # we can also use the radius R of the earth: the length of the arc AB is given by R*gamma with gamma in radians
-        p = 180/pi;  Rearth = 6378.388 # in km
-        nm <- 1852e-3 # 1852 m for 1 nm
-        res <- switch(.unit,
-           km = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))),
-           nm = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))) / nm
-           )
-       names(res) <- NULL
-        return(res)
-        }
-
-    # utils---
-    # library(segmented)
-    # from library(segmented) # getS3method("segmented", "lm")
-    ## CAUTION: UNFORTUNATELY, THE FUNCTION NEED TO BE PLACED HERE BECAUSE A PROBLEM OF ENV
-    seg.control <- function (toll = 1e-04, it.max = 20, display = FALSE, last = TRUE,
-    maxit.glm = 25, h = 1){
-    list(toll = toll, it.max = it.max, visual = display, last = last,
-        maxit.glm = maxit.glm, h = h)
-    }
-    segmented <- function (obj, seg.Z, psi, control = seg.control(), model.frame = TRUE,
-    ...){
-    it.max <- old.it.max <- control$it.max
-    toll <- control$toll
-    visual <- control$visual
-    last <- control$last
-    h <- min(abs(control$h), 1)
-    if (h < 1)
-        it.max <- it.max + round(it.max/2)
-    objframe <- update(obj, model = TRUE, x = TRUE, y = TRUE)
-    y <- objframe$y
-    a <- model.matrix(seg.Z, data = eval(obj$call$data))
-    a <- subset(a, select = colnames(a)[-1])
-    n <- nrow(a)
-    Z <- list()
-    for (i in colnames(a)) Z[[length(Z) + 1]] <- a[, i]
-    name.Z <- names(Z) <- colnames(a)
-    if (length(Z) == 1 && is.vector(psi) && is.numeric(psi)) {
-        psi <- list(as.numeric(psi))
-        names(psi) <- name.Z
-    }
-    if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) ||
-        is.null(names(psi)))
-        stop("Z and psi have to be *named* list")
-    nomiZpsi <- match(names(Z), names(psi))
-    if (!identical(length(Z), length(psi)) || any(is.na(nomiZpsi)))
-        stop("Length or names of Z and psi do not match")
-    dd <- match(names(Z), names(psi))
-    nome <- names(psi)[dd]
-    psi <- psi[nome]
-    a <- sapply(psi, length)
-    b <- rep(1:length(a), times = a)
-    Znew <- list()
-    for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i],
-        a[i])
-    Z <- matrix(unlist(Znew), nrow = n)
-    colnames(Z) <- rep(nome, a)
-    psi <- unlist(psi)
-    k <- ncol(Z)
-    PSI <- matrix(rep(psi, rep(n, k)), ncol = k)
-    nomiZ <- rep(nome, times = a)
-    ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))],
-        function(xxx) {
-            1:xxx
-        })))
-    nomiU <- paste("U", ripetizioni, sep = "")
-    nomiU <- paste(nomiU, nomiZ, sep = ".")
-    KK <- new.env()
-    for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]),
-        objframe$model[[i]], envir = KK)
-    if (it.max == 0) {
-        U <- pmax((Z - PSI), 0)
-        colnames(U) <- paste(ripetizioni, nomiZ, sep = ".")
-        nomiU <- paste("U", colnames(U), sep = "")
-        for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK)
-        Fo <- update.formula(formula(obj), as.formula(paste(".~.+",
-            paste(nomiU, collapse = "+"))))
-        obj <- update(obj, formula = Fo, data = KK)
-        if (model.frame)
-            obj$model <- data.frame(as.list(KK))
-        obj$psi <- psi
-        return(obj)
-    }
-    XREG <- model.matrix(obj)
-    o <- model.offset(objframe)
-    w <- model.weights(objframe)
-    if (is.null(w))
-        w <- rep(1, n)
-    if (is.null(o))
-        o <- rep(0, n)
-    initial <- psi
-    it <- 1
-    epsilon <- 10
-    obj0 <- obj
-    list.obj <- list(obj)
-    psi.values <- NULL
-    rangeZ <- apply(Z, 2, range)
-    H <- 1
-    while (abs(epsilon) > toll) {
-        U <- pmax((Z - PSI), 0)
-        V <- ifelse((Z > PSI), -1, 0)
-        dev.old <- sum(obj$residuals^2)
-
-        X <- cbind(XREG, U, V)
-        rownames(X) <- NULL
-        if (ncol(V) == 1)
-            colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c("U", "V")
-        else colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U",
-            1:k, sep = ""), paste("V", 1:k, sep = ""))
-        obj <- lm.wfit(x = X, y = y, w = w, offset = o)
-        dev.new <- sum(obj$residuals^2)
-        if (visual) {
-            if (it == 1)
-                cat(0, " ", formatC(dev.old, 3, format = "f"),
-                  "", "(No breakpoint(s))", "\n")
-            spp <- if (it < 10)
-                ""
-            else NULL
-            cat(it, spp, "", formatC(dev.new, 3, format = "f"),
-                "\n")
-        }
-        epsilon <- (dev.new - dev.old)/dev.old
-        obj$epsilon <- epsilon
-        it <- it + 1
-        obj$it <- it
-        class(obj) <- c("segmented", class(obj))
-        list.obj[[length(list.obj) + ifelse(last == TRUE, 0,
-            1)]] <- obj
-        if (k == 1) {
-            beta.c <- coef(obj)["U"]
-            gamma.c <- coef(obj)["V"]
-        }
-        else {
-            beta.c <- coef(obj)[paste("U", 1:k, sep = "")]
-            gamma.c <- coef(obj)[paste("V", 1:k, sep = "")]
-        }
-        if (it > it.max)
-            break
-        psi.values[[length(psi.values) + 1]] <- psi.old <- psi
-        if (it >= old.it.max && h < 1)
-            H <- h
-        psi <- psi.old + H * gamma.c/beta.c
-        PSI <- matrix(rep(psi, rep(nrow(Z), ncol(Z))), ncol = ncol(Z))
-        a <- apply((Z < PSI), 2, all)
-        b <- apply((Z > PSI), 2, all)
-        if (sum(a + b) != 0 || is.na(sum(a + b)))
-            stop("(Some) estimated psi out of its range")
-        obj$psi <- psi
-    }
-    psi.values[[length(psi.values) + 1]] <- psi
-    id.warn <- FALSE
-    if (it > it.max) {
-        warning("max number of iterations attained", call. = FALSE)
-        id.warn <- TRUE
-    }
-    Vxb <- V %*% diag(beta.c, ncol = length(beta.c))
-    colnames(U) <- paste(ripetizioni, nomiZ, sep = ".")
-    colnames(Vxb) <- paste(ripetizioni, nomiZ, sep = ".")
-    nomiU <- paste("U", colnames(U), sep = "")
-    nomiVxb <- paste("psi", colnames(Vxb), sep = "")
-    for (i in 1:ncol(U)) {
-        assign(nomiU[i], U[, i], envir = KK)
-        assign(nomiVxb[i], Vxb[, i], envir = KK)
-    }
-    nnomi <- c(nomiU, nomiVxb)
-    Fo <- update.formula(formula(obj0), as.formula(paste(".~.+",
-        paste(nnomi, collapse = "+"))))
-    objF <- update(obj0, formula = Fo, data = KK)
-    Cov <- vcov(objF)
-    id <- match(paste("psi", colnames(Vxb), sep = ""), names(coef(objF)))
-    vv <- if (length(id) == 1)
-        Cov[id, id]
-    else diag(Cov[id, id])
-    psi <- cbind(initial, psi, sqrt(vv))
-    rownames(psi) <- colnames(Cov)[id]
-    colnames(psi) <- c("Initial", "Est.", "St.Err")
-    objF$rangeZ <- rangeZ
-    objF$psi.history <- psi.values
-    objF$psi <- psi
-    objF$it <- (it - 1)
-    objF$epsilon <- epsilon
-    objF$call <- match.call()
-    objF$nameUV <- list(U = paste("U", colnames(Vxb), sep = ""),
-        V = rownames(psi), Z = name.Z)
-    objF$id.warn <- id.warn
-    if (model.frame)
-        objF$mframe <- data.frame(as.list(KK))
-    class(objF) <- c("segmented", class(obj0))
-    list.obj[[length(list.obj) + 1]] <- objF
-    class(list.obj) <- "segmented"
-    if (last)
-        list.obj <- list.obj[[length(list.obj)]]
-    return(list.obj)
-    }
-
-
-  # hereafter, the core code...
-  # A FOR-LOOP BY VESSEL--------------------
-  for(a.vesselid in vessels){ 
-    tacsat.this.vessel <- tacsat[tacsat$VE_REF %in% a.vesselid, ]
-  
-    if('SI_DATE' %in% colnames(tacsat.this.vessel))
-          {
-          tacsat.this.vessel$SI_DATIM  <- strptime(  paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME),
-                            tz='GMT', "%e/%m/%Y %H:%M" )
-          } else{
-          if(!any(colnames(tacsat.this.vessel) %in% 'SI_DATIM')) stop('you need either to inform a SI_DATIM or a SI_DATE')
-          }
-            
-    if(is.null(general$speed)) general$speed <- "calculated"
-    if(general$speed=="calculated"){
-     # add a apparent speed colunm (nautical miles per hour)
-     diff.time <- tacsat.this.vessel[-nrow(tacsat.this.vessel),"SI_DATIM"] -
-                       tacsat.this.vessel[-1,"SI_DATIM"]
-     tacsat.this.vessel$diff.time.mins <- c(0, as.numeric(diff.time, units="mins"))
-     tacsat.this.vessel$speed <- signif(
-            c(0, distAB.f(A= tacsat.this.vessel[-nrow(tacsat.this.vessel),c("SI_LATI","SI_LONG")],
-              B= tacsat.this.vessel[-1,c("SI_LATI","SI_LONG")], .unit="nm") / (abs(tacsat.this.vessel[-1,]$diff.time.mins)/60)),
-              3)
-     }         
-     if(general$speed=="observed"){
-       tacsat.this.vessel$speed <- tacsat.this.vessel$SI_SP # instantaneous speed
-       }          
-
-    # cleaning irrealistic points
-    tacsat.this.vessel$speed <-
-       replace(tacsat.this.vessel$speed, is.na(tacsat.this.vessel$speed), 0)
-
-
-    idx <- tacsat.this.vessel[tacsat.this.vessel$speed >= 30 |
-                is.infinite(tacsat.this.vessel$speed),"idx"]
-    tacsat <- tacsat[!tacsat$idx %in% idx,] # just remove!
-    tacsat.this.vessel <- tacsat.this.vessel[tacsat.this.vessel$speed < 30,]
- 
-    # check
-    if(is.null(levels(factor(tacsat.this.vessel$LE_GEAR))))
-      stop('you need first to assign a gear LE_GEAR to each ping')
-  
-
-    # A FOR-LOOP BY GEAR--------------------
-      for (gr in levels(factor(tacsat.this.vessel$LE_GEAR))){ 
-
-        xxx <- tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,] # input
-
-        x <- as.numeric(as.character(sort(xxx$speed))) *100   # multiply by factor 100 because integer needed
-        hi <- hist(x, nclass=30,plot=FALSE) 
-
-        y <- c(1:length(sort(xxx$speed))) # sort in increasing order
-        y <- y[x>100 & x<1000] # just removing the 0, and the larger speeds we 100% know it is steaming
-        x <- x[x>100 & x<1000]
-        dati   <- data.frame(x=x,y=y)
-        dati$x <- as.integer(dati$x) # integer needed
-        psi    <- list(x= quantile(dati$x,probs=c(0.05,0.5))  )
-        assign('dati', dati, envir=.GlobalEnv) # DEBUG segmented()...this function looks in the global env to get dati!!
-        # get good start guesses
-        hi$counts <- hi$counts[-1]
-        idx       <- which(hi$counts==max(hi$counts))[1]
-        more.frequent.speed <- hi$mids[idx] # assumed to be for fishing
- 
-        a.flag <- FALSE
-        if(is.na(more.frequent.speed) | length(x)==0){ a.flag <- TRUE 
-        } else {
-         while(more.frequent.speed > 700 || more.frequent.speed < 100){
-         hi$counts <- hi$counts[-idx]
-         hi$mids <-  hi$mids[-idx]
-         idx       <- which(hi$counts==max(hi$counts))[1]
-         more.frequent.speed <- hi$mids[idx]
-         if(is.na(more.frequent.speed)){
-          #=> for very few cases we have no speed found within 100-700 e.g. for the UNK gear
-          a.flag <- TRUE ; break 
-          }
-        }
-       }  
-    if(!a.flag){  
-    start.lower.bound <- ifelse(more.frequent.speed-200<= min(dati$x),
-                                         min(dati$x)+100, more.frequent.speed-200)
-    start.upper.bound <- ifelse(more.frequent.speed+200>= max(dati$x),
-                                         max(dati$x)-100, more.frequent.speed+200)
-    psi    <- list(x= c(start.lower.bound, start.upper.bound) )
-    psi$x[1]    <- dati$x [dati$x<=psi$x[1]] [length(dati$x [dati$x<=psi$x[1]])]   # get the bound value of the start guess from dati$x
-    psi$x[2]    <- dati$x [dati$x>psi$x[2]] [1]
-    o <- 1 ; class(o) <- "try-error" ; count <- 0  ; bound1 <-NULL ; bound2 <- NULL;
-   while(class(o)=="try-error"){
-    count <- count+1
-    o <- try(
-         segmented(lm(y~x, data=dati) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses
-         silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so...
-   if(!"try-error" %in% class(o)) break else psi <- list(x=c(psi$x[1],psi$x[2]-20)) # searching decreasing by 20 each time
-   if(count>10) {bound1 <- start.lower.bound; bound2 <- start.upper.bound ; cat(paste("failure of the segmented regression for",a.vesselid,gr,"\n...")); break}
-   }
-  if(is.null(bound1)) bound1 <- o$psi[order(o$psi[,"Est."])[1],"Est."] -20 # -20 hard to justify...
-  if(is.null(bound2)) bound2 <- o$psi[order(o$psi[,"Est."])[2],"Est."] +20
-
- if(general$visual.check){
-   X11()
-   par(mfrow=c(2,1))
-   if(!"try-error" %in% class(o)){
-     plot(dati$x/100, o$fitted.values, type="l",
-         ylab="cumulative distrib.", xlab="Knots",
-           main=paste("segmented regression  - ",a.vesselid))
-    } else{plot(0,0,type="n")}
-   #plot(hi)
-   #points(dati$x,dati$y)
-   tmp <- as.numeric(as.character(sort(xxx$speed)))
-   his <- hist(tmp, nclass=100, main=paste(general$speed,"speed between consecutive points"),
-                      xlab="apparent speed [knots]", plot=TRUE)
-   if(!is.null(bound1)) abline(v=bound1/100,col=2)
-   if(!is.null(bound2)) abline(v=bound2/100,col=2)
-   if(!is.null(bound1)) text(bound1/100, median(his$counts), signif(bound1,3), col=2, cex=2)
-   if(!is.null(bound2)) text(bound2/100,  median(his$counts), signif(bound2,3), col=2, cex=2)
-   # save the panel plot
-   savePlot(filename = file.path(general$output.path,
-      paste(unique(a.vesselid),"-detected_speed_span_for_feffort-", general$a.year,"-",gr, sep="")),
-          type = c("wmf"), device = dev.cur(), restoreConsole = TRUE)
-  dev.off()
-  }
-
-  # so,
-  bound1 <- bound1  / 100  # transform back
-  bound2 <- bound2  / 100  # transform back
-  xxx$speed <- replace(xxx$speed, is.na(xxx$speed), 0) # debug 0/0
- 
-
-
-  # maybe you want to only keep the upper bound
-  # and then set the lower one for a particular gear?
-  if(gr %in% gears.to.force){
-                bound1 <- force.lower.bound
-              cat(paste(gr," force lower bound at", force.lower.bound, "knots\n"))
-             }
-
-  # then, assign...
-  xxx[xxx$speed < bound1, "SI_STATE"]                        <- 2 # steaming
-  xxx[xxx$speed >= bound1 & xxx$speed < bound2, "SI_STATE"]  <- 1 # fishing
-  xxx[xxx$speed >= bound2 , "SI_STATE"]                      <- 2 # steaming
-  tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- xxx$SI_STATE # output
-  tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound1"] <- bound1
-  tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound2"] <- bound2
-  cat(paste(gr," lower", general$speed,"speed bound:",round(bound1,1),"knots\n"))
-  cat(paste(gr," upper", general$speed,"speed bound:",round(bound2,1),"knots\n"))
-  } else{
-     tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- 2
-     #=> end a.flag: in case of very few records for this gear...
-     } 
-  } # end gr
-
-
-  # clean up by removing no longer used columns
-  tacsat.this.vessel <- tacsat.this.vessel[, !colnames(tacsat.this.vessel) %in%
-                         c('speed','diff.time.mins')]
-
-  tacsat[tacsat$VE_REF==a.vesselid,] <- tacsat.this.vessel # output
-  } # end of a.vesselid
-
- 
-  # plot for speed bound distribution
-  tacsat2 <- tacsat[tacsat$VE_REF %in% vessels & !duplicated(tacsat[,c("VE_REF","LE_GEAR")]),]
-  b1 <- tapply(tacsat2$bound1, tacsat2$LE_GEAR, mean, na.rm=TRUE)
-  b2 <- tapply(tacsat2$bound2, tacsat2$LE_GEAR, mean, na.rm=TRUE)
-  cat(paste("lower speed bound mean:",round(b1,1),"knots\n"))
-  cat(paste("upper speed bound mean:",round(b2,1),"knots\n"))
-  if(general$visual.check){
-    X11()
-    op <- par(no.readonly = TRUE)
-    par(mfrow=c(1,2))
-    boxplot(bound1 ~ LE_GEAR, data=tacsat2, ylim=c(0,8))
-    boxplot(bound2 ~ LE_GEAR, data=tacsat2, ylim=c(0,8))
-    par(op)
-    dev.off()
-  }
-  # save
-  write.table(tacsat2[,c("VE_REF","LE_GEAR","bound1","bound2")], append=TRUE,
-   file = file.path(general$output.path,
-      paste("speed_bounds_per_vessel_per_gear_", general$a.year,".txt", sep="")),
-        quote=FALSE, col.names=FALSE, row.names=FALSE)
-
-
-return(tacsat[tacsat$VE_REF %in% vessels, !colnames(tacsat) %in% c('bound1','bound2')])
-}
-
+# do the distinction between fishing and non-fishing
+# by automatic detection of the fishing peak
+# adapted from library 'segmented'
+# F. Bastardie
+
+
+
+#' do the distinction between fishing/non-fishing detecting the fishing speed
+#' peak
+#' 
+#' do the distinction between fishing/non-fishing based on (semi)automatic
+#' detection of the fishing speed peak to account for the skipper/vessel
+#' effect. This is done per vessel and independently for each gear of the
+#' vessel. A segmented regression is performed on the cumulative speed
+#' distribution to identify the speed breakpoints bounding the fishing peak
+#' i.e. the first peak. The method returns tacsat (a subset if vessels
+#' informed) with the SI_STATE informed.
+#' 
+#' A (quick and dirty) vessel and gear-specific method has been suggested by
+#' Bastardie et al. 2010 for dividing the fishing positions from the
+#' non-fishing positions (or 'steaming') based on vessel speed profile. Data
+#' from trips with observers on board recording vessel movement with a high
+#' spatial resolution (e.g. a position recorded every minute) demonstrated that
+#' the vessel speed frequency histogram is usually bimodal for commercial
+#' trawlers, where the first low speed peak likely corresponds to fishing
+#' activity, while the upper peak corresponds to faster movements, e.g.
+#' steaming between fishing grounds and harbours. Low speeds can also
+#' correspond, for instance, to a regulated speed leaving a harbour.
+#' 
+#' Speed boundaries were accordingly determined by applying a segmented
+#' regression to the cumulative distribution of vessel speeds ('calculated' or
+#' 'observed', see the argument of the function) as a practical way to
+#' automatically detect break points encapsulating the first peak of the speed
+#' frequency histogram.  The computation is preferably done on calculated speed
+#' instead of instantaneous (observed) speeds to avoid possible
+#' non-representative records of speed (discrete records usually with large
+#' time-span between).  Peak detection is done for each vessel and for each of
+#' its gear types to account for individual skipper behaviour and gear-specific
+#' constraints on fishing activity.  The present methodology is however not
+#' well suited for the particular gillnet and seine activities because no peak
+#' are found from observations of speed histograms. For these fishing
+#' activities, only the upper boundary of the automatic peak detection should
+#' be retained, assuming that fishing start at a given value in speed e.g. 0.5
+#' nm.
+#' 
+#' @param tacsat tacsat with LE_GEAR informed for each ping
+#' @param general a list of general settings e.g. the output path for saving
+#' the plot
+#' @param ... vessels, a vector of vessel identifiers VE_REF that can be found
+#' in tacsat.  force.lower.bound might also be used to fix the lower speed at
+#' some value for gillnet and seine gear types
+#' @author Francois Bastardie
+#' @seealso \code{mergeEflalo2Pings}
+#' @references Bastardie et al. 2010. Fisheries Research
+#' @examples
+#' 
+#' 
+#' \dontrun{
+#' data(tacsat) 
+#' # fake here, but an informed LE_GEAR is required
+#' tacsat$LE_GEAR <-"OTB"
+#' tacsat <- segmentTacsatSpeed (
+#'               tacsat=tacsat,
+#'               vessels="35",
+#'               force.lower.bound= 0.5,
+#'               gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'),
+#'               general=
+#'                 list(
+#'                   output.path=file.path('C:','output'),
+#'                   visual.check=TRUE, speed="calculated"
+#'                   )
+#'               )
+#' }
+#' 
+#' 
+#' @export segmentTacsatSpeed
+segmentTacsatSpeed <- function(tacsat,
+                               vessels= unique(tacsat$VE_REF),
+                               force.lower.bound=0.5,  
+                               gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'),
+                               general=list(
+                                   output.path=file.path('C:','output'),
+                                   visual.check=TRUE, 
+                                   a.year=2009,
+                                   what.speed="calculated", 
+                                    ),
+                                ...){
+
+ 
+   lstargs <-  as.list( sys.call() ) # deprecated
+
+  # checks
+  if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE <- NA
+  if(!'idx' %in% colnames(tacsat))  tacsat$idx <- 1:nrow(tacsat)
+  cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n")
+  dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777")
+ 
+  # add
+  tacsat$bound1 <- NA
+  tacsat$bound2 <- NA
+
+
+   # utils---
+  distAB.f <- function(A,B, .unit="km"){
+  # return the dist in km or nautical miles.
+  # coord input: e.g. A[54,10] means [54 N, 10 E]
+  # formula = cos (gamma) = sin LatA sin LatB + cos LatA cos LatB cos delta
+  # if gamma in degree, knowing that 1 degree = 60 minutes = 60 nm, then gamma in degree * 60 = dist in nm
+  # we can also use the radius R of the earth: the length of the arc AB is given by R*gamma with gamma in radians
+        p = 180/pi;  Rearth = 6378.388 # in km
+        nm <- 1852e-3 # 1852 m for 1 nm
+        res <- switch(.unit,
+           km = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))),
+           nm = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))) / nm
+           )
+       names(res) <- NULL
+        return(res)
+        }
+
+    # utils---
+    # library(segmented)
+    # from library(segmented) # getS3method("segmented", "lm")
+    ## CAUTION: UNFORTUNATELY, THE FUNCTION NEED TO BE PLACED HERE BECAUSE A PROBLEM OF ENV
+    seg.control <- function (toll = 1e-04, it.max = 20, display = FALSE, last = TRUE,
+    maxit.glm = 25, h = 1){
+    list(toll = toll, it.max = it.max, visual = display, last = last,
+        maxit.glm = maxit.glm, h = h)
+    }
+    segmented <- function (obj, seg.Z, psi, control = seg.control(), model.frame = TRUE,
+    ...){
+    it.max <- old.it.max <- control$it.max
+    toll <- control$toll
+    visual <- control$visual
+    last <- control$last
+    h <- min(abs(control$h), 1)
+    if (h < 1)
+        it.max <- it.max + round(it.max/2)
+    objframe <- update(obj, model = TRUE, x = TRUE, y = TRUE)
+    y <- objframe$y
+    a <- model.matrix(seg.Z, data = eval(obj$call$data))
+    a <- subset(a, select = colnames(a)[-1])
+    n <- nrow(a)
+    Z <- list()
+    for (i in colnames(a)) Z[[length(Z) + 1]] <- a[, i]
+    name.Z <- names(Z) <- colnames(a)
+    if (length(Z) == 1 && is.vector(psi) && is.numeric(psi)) {
+        psi <- list(as.numeric(psi))
+        names(psi) <- name.Z
+    }
+    if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) ||
+        is.null(names(psi)))
+        stop("Z and psi have to be *named* list")
+    nomiZpsi <- match(names(Z), names(psi))
+    if (!identical(length(Z), length(psi)) || any(is.na(nomiZpsi)))
+        stop("Length or names of Z and psi do not match")
+    dd <- match(names(Z), names(psi))
+    nome <- names(psi)[dd]
+    psi <- psi[nome]
+    a <- sapply(psi, length)
+    b <- rep(1:length(a), times = a)
+    Znew <- list()
+    for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i],
+        a[i])
+    Z <- matrix(unlist(Znew), nrow = n)
+    colnames(Z) <- rep(nome, a)
+    psi <- unlist(psi)
+    k <- ncol(Z)
+    PSI <- matrix(rep(psi, rep(n, k)), ncol = k)
+    nomiZ <- rep(nome, times = a)
+    ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))],
+        function(xxx) {
+            1:xxx
+        })))
+    nomiU <- paste("U", ripetizioni, sep = "")
+    nomiU <- paste(nomiU, nomiZ, sep = ".")
+    KK <- new.env()
+    for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]),
+        objframe$model[[i]], envir = KK)
+    if (it.max == 0) {
+        U <- pmax((Z - PSI), 0)
+        colnames(U) <- paste(ripetizioni, nomiZ, sep = ".")
+        nomiU <- paste("U", colnames(U), sep = "")
+        for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK)
+        Fo <- update.formula(formula(obj), as.formula(paste(".~.+",
+            paste(nomiU, collapse = "+"))))
+        obj <- update(obj, formula = Fo, data = KK)
+        if (model.frame)
+            obj$model <- data.frame(as.list(KK))
+        obj$psi <- psi
+        return(obj)
+    }
+    XREG <- model.matrix(obj)
+    o <- model.offset(objframe)
+    w <- model.weights(objframe)
+    if (is.null(w))
+        w <- rep(1, n)
+    if (is.null(o))
+        o <- rep(0, n)
+    initial <- psi
+    it <- 1
+    epsilon <- 10
+    obj0 <- obj
+    list.obj <- list(obj)
+    psi.values <- NULL
+    rangeZ <- apply(Z, 2, range)
+    H <- 1
+    while (abs(epsilon) > toll) {
+        U <- pmax((Z - PSI), 0)
+        V <- ifelse((Z > PSI), -1, 0)
+        dev.old <- sum(obj$residuals^2)
+
+        X <- cbind(XREG, U, V)
+        rownames(X) <- NULL
+        if (ncol(V) == 1)
+            colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c("U", "V")
+        else colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U",
+            1:k, sep = ""), paste("V", 1:k, sep = ""))
+        obj <- lm.wfit(x = X, y = y, w = w, offset = o)
+        dev.new <- sum(obj$residuals^2)
+        if (visual) {
+            if (it == 1)
+                cat(0, " ", formatC(dev.old, 3, format = "f"),
+                  "", "(No breakpoint(s))", "\n")
+            spp <- if (it < 10)
+                ""
+            else NULL
+            cat(it, spp, "", formatC(dev.new, 3, format = "f"),
+                "\n")
+        }
+        epsilon <- (dev.new - dev.old)/dev.old
+        obj$epsilon <- epsilon
+        it <- it + 1
+        obj$it <- it
+        class(obj) <- c("segmented", class(obj))
+        list.obj[[length(list.obj) + ifelse(last == TRUE, 0,
+            1)]] <- obj
+        if (k == 1) {
+            beta.c <- coef(obj)["U"]
+            gamma.c <- coef(obj)["V"]
+        }
+        else {
+            beta.c <- coef(obj)[paste("U", 1:k, sep = "")]
+            gamma.c <- coef(obj)[paste("V", 1:k, sep = "")]
+        }
+        if (it > it.max)
+            break
+        psi.values[[length(psi.values) + 1]] <- psi.old <- psi
+        if (it >= old.it.max && h < 1)
+            H <- h
+        psi <- psi.old + H * gamma.c/beta.c
+        PSI <- matrix(rep(psi, rep(nrow(Z), ncol(Z))), ncol = ncol(Z))
+        a <- apply((Z < PSI), 2, all)
+        b <- apply((Z > PSI), 2, all)
+        if (sum(a + b) != 0 || is.na(sum(a + b)))
+            stop("(Some) estimated psi out of its range")
+        obj$psi <- psi
+    }
+    psi.values[[length(psi.values) + 1]] <- psi
+    id.warn <- FALSE
+    if (it > it.max) {
+        warning("max number of iterations attained", call. = FALSE)
+        id.warn <- TRUE
+    }
+    Vxb <- V %*% diag(beta.c, ncol = length(beta.c))
+    colnames(U) <- paste(ripetizioni, nomiZ, sep = ".")
+    colnames(Vxb) <- paste(ripetizioni, nomiZ, sep = ".")
+    nomiU <- paste("U", colnames(U), sep = "")
+    nomiVxb <- paste("psi", colnames(Vxb), sep = "")
+    for (i in 1:ncol(U)) {
+        assign(nomiU[i], U[, i], envir = KK)
+        assign(nomiVxb[i], Vxb[, i], envir = KK)
+    }
+    nnomi <- c(nomiU, nomiVxb)
+    Fo <- update.formula(formula(obj0), as.formula(paste(".~.+",
+        paste(nnomi, collapse = "+"))))
+    objF <- update(obj0, formula = Fo, data = KK)
+    Cov <- vcov(objF)
+    id <- match(paste("psi", colnames(Vxb), sep = ""), names(coef(objF)))
+    vv <- if (length(id) == 1)
+        Cov[id, id]
+    else diag(Cov[id, id])
+    psi <- cbind(initial, psi, sqrt(vv))
+    rownames(psi) <- colnames(Cov)[id]
+    colnames(psi) <- c("Initial", "Est.", "St.Err")
+    objF$rangeZ <- rangeZ
+    objF$psi.history <- psi.values
+    objF$psi <- psi
+    objF$it <- (it - 1)
+    objF$epsilon <- epsilon
+    objF$call <- match.call()
+    objF$nameUV <- list(U = paste("U", colnames(Vxb), sep = ""),
+        V = rownames(psi), Z = name.Z)
+    objF$id.warn <- id.warn
+    if (model.frame)
+        objF$mframe <- data.frame(as.list(KK))
+    class(objF) <- c("segmented", class(obj0))
+    list.obj[[length(list.obj) + 1]] <- objF
+    class(list.obj) <- "segmented"
+    if (last)
+        list.obj <- list.obj[[length(list.obj)]]
+    return(list.obj)
+    }
+
+
+  # hereafter, the core code...
+  # A FOR-LOOP BY VESSEL--------------------
+  for(a.vesselid in vessels){ 
+    tacsat.this.vessel <- tacsat[tacsat$VE_REF %in% a.vesselid, ]
+  
+    if('SI_DATE' %in% colnames(tacsat.this.vessel))
+          {
+          tacsat.this.vessel$SI_DATIM  <- strptime(  paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME),
+                            tz='GMT', "%e/%m/%Y %H:%M" )
+          } else{
+          if(!any(colnames(tacsat.this.vessel) %in% 'SI_DATIM')) stop('you need either to inform a SI_DATIM or a SI_DATE')
+          }
+            
+    if(is.null(general$speed)) general$speed <- "calculated"
+    if(general$speed=="calculated"){
+     # add a apparent speed colunm (nautical miles per hour)
+     diff.time <- tacsat.this.vessel[-nrow(tacsat.this.vessel),"SI_DATIM"] -
+                       tacsat.this.vessel[-1,"SI_DATIM"]
+     tacsat.this.vessel$diff.time.mins <- c(0, as.numeric(diff.time, units="mins"))
+     tacsat.this.vessel$speed <- signif(
+            c(0, distAB.f(A= tacsat.this.vessel[-nrow(tacsat.this.vessel),c("SI_LATI","SI_LONG")],
+              B= tacsat.this.vessel[-1,c("SI_LATI","SI_LONG")], .unit="nm") / (abs(tacsat.this.vessel[-1,]$diff.time.mins)/60)),
+              3)
+     }         
+     if(general$speed=="observed"){
+       tacsat.this.vessel$speed <- tacsat.this.vessel$SI_SP # instantaneous speed
+       }          
+
+    # cleaning irrealistic points
+    tacsat.this.vessel$speed <-
+       replace(tacsat.this.vessel$speed, is.na(tacsat.this.vessel$speed), 0)
+
+
+    idx <- tacsat.this.vessel[tacsat.this.vessel$speed >= 30 |
+                is.infinite(tacsat.this.vessel$speed),"idx"]
+    tacsat <- tacsat[!tacsat$idx %in% idx,] # just remove!
+    tacsat.this.vessel <- tacsat.this.vessel[tacsat.this.vessel$speed < 30,]
+ 
+    # check
+    if(is.null(levels(factor(tacsat.this.vessel$LE_GEAR))))
+      stop('you need first to assign a gear LE_GEAR to each ping')
+  
+
+    # A FOR-LOOP BY GEAR--------------------
+      for (gr in levels(factor(tacsat.this.vessel$LE_GEAR))){ 
+
+        xxx <- tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,] # input
+
+        x <- as.numeric(as.character(sort(xxx$speed))) *100   # multiply by factor 100 because integer needed
+        hi <- hist(x, nclass=30,plot=FALSE) 
+
+        y <- c(1:length(sort(xxx$speed))) # sort in increasing order
+        y <- y[x>100 & x<1000] # just removing the 0, and the larger speeds we 100% know it is steaming
+        x <- x[x>100 & x<1000]
+        dati   <- data.frame(x=x,y=y)
+        dati$x <- as.integer(dati$x) # integer needed
+        psi    <- list(x= quantile(dati$x,probs=c(0.05,0.5))  )
+        assign('dati', dati, envir=.GlobalEnv) # DEBUG segmented()...this function looks in the global env to get dati!!
+        # get good start guesses
+        hi$counts <- hi$counts[-1]
+        idx       <- which(hi$counts==max(hi$counts))[1]
+        more.frequent.speed <- hi$mids[idx] # assumed to be for fishing
+ 
+        a.flag <- FALSE
+        if(is.na(more.frequent.speed) | length(x)==0){ a.flag <- TRUE 
+        } else {
+         while(more.frequent.speed > 700 || more.frequent.speed < 100){
+         hi$counts <- hi$counts[-idx]
+         hi$mids <-  hi$mids[-idx]
+         idx       <- which(hi$counts==max(hi$counts))[1]
+         more.frequent.speed <- hi$mids[idx]
+         if(is.na(more.frequent.speed)){
+          #=> for very few cases we have no speed found within 100-700 e.g. for the UNK gear
+          a.flag <- TRUE ; break 
+          }
+        }
+       }  
+    if(!a.flag){  
+    start.lower.bound <- ifelse(more.frequent.speed-200<= min(dati$x),
+                                         min(dati$x)+100, more.frequent.speed-200)
+    start.upper.bound <- ifelse(more.frequent.speed+200>= max(dati$x),
+                                         max(dati$x)-100, more.frequent.speed+200)
+    psi    <- list(x= c(start.lower.bound, start.upper.bound) )
+    psi$x[1]    <- dati$x [dati$x<=psi$x[1]] [length(dati$x [dati$x<=psi$x[1]])]   # get the bound value of the start guess from dati$x
+    psi$x[2]    <- dati$x [dati$x>psi$x[2]] [1]
+    o <- 1 ; class(o) <- "try-error" ; count <- 0  ; bound1 <-NULL ; bound2 <- NULL;
+   while(class(o)=="try-error"){
+    count <- count+1
+    o <- try(
+         segmented(lm(y~x, data=dati) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses
+         silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so...
+   if(!"try-error" %in% class(o)) break else psi <- list(x=c(psi$x[1],psi$x[2]-20)) # searching decreasing by 20 each time
+   if(count>10) {bound1 <- start.lower.bound; bound2 <- start.upper.bound ; cat(paste("failure of the segmented regression for",a.vesselid,gr,"\n...")); break}
+   }
+  if(is.null(bound1)) bound1 <- o$psi[order(o$psi[,"Est."])[1],"Est."] -20 # -20 hard to justify...
+  if(is.null(bound2)) bound2 <- o$psi[order(o$psi[,"Est."])[2],"Est."] +20
+
+ if(general$visual.check){
+   X11()
+   par(mfrow=c(2,1))
+   if(!"try-error" %in% class(o)){
+     plot(dati$x/100, o$fitted.values, type="l",
+         ylab="cumulative distrib.", xlab="Knots",
+           main=paste("segmented regression  - ",a.vesselid))
+    } else{plot(0,0,type="n")}
+   #plot(hi)
+   #points(dati$x,dati$y)
+   tmp <- as.numeric(as.character(sort(xxx$speed)))
+   his <- hist(tmp, nclass=100, main=paste(general$speed,"speed between consecutive points"),
+                      xlab="apparent speed [knots]", plot=TRUE)
+   if(!is.null(bound1)) abline(v=bound1/100,col=2)
+   if(!is.null(bound2)) abline(v=bound2/100,col=2)
+   if(!is.null(bound1)) text(bound1/100, median(his$counts), signif(bound1,3), col=2, cex=2)
+   if(!is.null(bound2)) text(bound2/100,  median(his$counts), signif(bound2,3), col=2, cex=2)
+   # save the panel plot
+   savePlot(filename = file.path(general$output.path,
+      paste(unique(a.vesselid),"-detected_speed_span_for_feffort-", general$a.year,"-",gr, sep="")),
+          type = c("wmf"), device = dev.cur(), restoreConsole = TRUE)
+  dev.off()
+  }
+
+  # so,
+  bound1 <- bound1  / 100  # transform back
+  bound2 <- bound2  / 100  # transform back
+  xxx$speed <- replace(xxx$speed, is.na(xxx$speed), 0) # debug 0/0
+ 
+
+
+  # maybe you want to only keep the upper bound
+  # and then set the lower one for a particular gear?
+  if(gr %in% gears.to.force){
+                bound1 <- force.lower.bound
+              cat(paste(gr," force lower bound at", force.lower.bound, "knots\n"))
+             }
+
+  # then, assign...
+  xxx[xxx$speed < bound1, "SI_STATE"]                        <- 2 # steaming
+  xxx[xxx$speed >= bound1 & xxx$speed < bound2, "SI_STATE"]  <- 1 # fishing
+  xxx[xxx$speed >= bound2 , "SI_STATE"]                      <- 2 # steaming
+  tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- xxx$SI_STATE # output
+  tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound1"] <- bound1
+  tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound2"] <- bound2
+  cat(paste(gr," lower", general$speed,"speed bound:",round(bound1,1),"knots\n"))
+  cat(paste(gr," upper", general$speed,"speed bound:",round(bound2,1),"knots\n"))
+  } else{
+     tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- 2
+     #=> end a.flag: in case of very few records for this gear...
+     } 
+  } # end gr
+
+
+  # clean up by removing no longer used columns
+  tacsat.this.vessel <- tacsat.this.vessel[, !colnames(tacsat.this.vessel) %in%
+                         c('speed','diff.time.mins')]
+
+  tacsat[tacsat$VE_REF==a.vesselid,] <- tacsat.this.vessel # output
+  } # end of a.vesselid
+
+ 
+  # plot for speed bound distribution
+  tacsat2 <- tacsat[tacsat$VE_REF %in% vessels & !duplicated(tacsat[,c("VE_REF","LE_GEAR")]),]
+  b1 <- tapply(tacsat2$bound1, tacsat2$LE_GEAR, mean, na.rm=TRUE)
+  b2 <- tapply(tacsat2$bound2, tacsat2$LE_GEAR, mean, na.rm=TRUE)
+  cat(paste("lower speed bound mean:",round(b1,1),"knots\n"))
+  cat(paste("upper speed bound mean:",round(b2,1),"knots\n"))
+  if(general$visual.check){
+    X11()
+    op <- par(no.readonly = TRUE)
+    par(mfrow=c(1,2))
+    boxplot(bound1 ~ LE_GEAR, data=tacsat2, ylim=c(0,8))
+    boxplot(bound2 ~ LE_GEAR, data=tacsat2, ylim=c(0,8))
+    par(op)
+    dev.off()
+  }
+  # save
+  write.table(tacsat2[,c("VE_REF","LE_GEAR","bound1","bound2")], append=TRUE,
+   file = file.path(general$output.path,
+      paste("speed_bounds_per_vessel_per_gear_", general$a.year,".txt", sep="")),
+        quote=FALSE, col.names=FALSE, row.names=FALSE)
+
+
+return(tacsat[tacsat$VE_REF %in% vessels, !colnames(tacsat) %in% c('bound1','bound2')])
+}
+
diff --git a/vmstools/R/segmentedTacsatSpeed.r b/vmstools/R/segmentedTacsatSpeed.r
index 6ebde2e..aaa9312 100644
--- a/vmstools/R/segmentedTacsatSpeed.r
+++ b/vmstools/R/segmentedTacsatSpeed.r
@@ -1,141 +1,192 @@
-segmentedTacsatSpeed <- function(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95,saveDir=tempdir(),forceLowerBound=NULL){
-
-  require(segmented)
-  tacsat$idxFun <- 1:nrow(tacsat)
-
-  #Regular checks
-  if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE  <- NA
-  if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM  <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-  if(analyse.by == "LE_GEAR"){ if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'")}
-  if(!analyse.by %in% c("LE_GEAR","VE_REF","VE_REF+LE_GEAR")) warning("Analysing by unknown column variable, please check!")
-  if(analyse.by == "VE_REF+LE_GEAR"){
-    if(!"VE_REF" %in% colnames(tacsat) | !"LE_GEAR" %in% colnames(tacsat)) stop("VE_REF and LE_GEAR not both in tacsat available")
-    tacsat$VE_REF_LE_GEAR <- paste(tacsat$VE_REF,tacsat$LE_GEAR)
-    analyse.by <- "VE_REF_LE_GEAR"
-  }
-
-  tacsatOrig <- tacsat
-  if(analyse.by %in% colnames(tacsat)){
-    if(units == "all"){   yrs <- 0; mths <- 0; wks <- 0}
-    if(units == "year"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- 0;                                    wks  <- 0}
-    if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- sort(unique(month(tacsat$SI_DATIM))); wks  <- 0}
-    if(units == "week"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks   <- sort(unique(week(tacsat$SI_DATIM)));  mths <- 0}
-  } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")}
-    storeScheme               <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by]))
-    # Add upper and lower boundaries to storeScheme
-    storeScheme               <- cbind(storeScheme,data.frame(lower=rep(0,nrow(storeScheme)),
-                                                              upper=rep(0,nrow(storeScheme))))
-    storeScheme$analyse.by    <- ac(storeScheme$analyse.by)
-    storeScheme$success       <- 0
-  for(iRun in 1:nrow(storeScheme)){
-      yr  <- storeScheme[iRun,"years"]
-      mth <- storeScheme[iRun,"months"]
-      wk  <- storeScheme[iRun,"weeks"]
-      aby <- storeScheme[iRun,"analyse.by"]
-      if(analyse.by == "VE_REF"){
-        if(nrow(storeScheme)==1){ sTacsat <- tacsat
-        } else {
-            if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF == aby)
-            if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF == aby)
-            if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF == aby)
-          }
-      }
-      if(analyse.by == "LE_GEAR"){
-        if(nrow(storeScheme)==1){ sTacsat <- tacsat
-        } else {
-            if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & LE_GEAR == aby)
-            if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & LE_GEAR == aby)
-            if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & LE_GEAR == aby)
-          }
-      }
-      if(analyse.by == "VE_REF_LE_GEAR"){
-        if(nrow(storeScheme)==1){ sTacsat <- tacsat
-        } else {
-            if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF_LE_GEAR == aby )
-            if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF_LE_GEAR == aby)
-            if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF_LE_GEAR == aby)
-          }
-      }
-      
-      #Check if there is any data available
-      if(nrow(sTacsat)>0){
-
-        #Check if you want to run with calculated or instantaneous speed
-        if(speed == "calculated"){
-          sTacsat             <- sortTacsat(sTacsat)
-          sTacsat$SI_SP_ORIG  <- sTacsat$SI_SP
-          if("FT_REF" %in% colnames(tacsat)){ sTacsat$SI_SP <- calculateSpeed(sTacsat,level="trip",   weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA
-          } else {                             sTacsat$SI_SP <- calculateSpeed(sTacsat,level="vessel", weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA}
-        }
-        #Remove records where SI_SP is NA
-        sTacsat <- sTacsat[which(is.na(sTacsat$SI_SP)==F & sTacsat$SI_SP > 0 & sTacsat$SI_SP <= 10),]
-
-
-        minRows <- 20 #Based on a scan of sub tacsat datasets, 5% percentile was approx 20
-
-        #Exception exercise: If number of rows in sTacsat is really low, return NA
-        if(nrow(sTacsat) <= minRows){
-          storeScheme[iRun,"lower"] <- NA
-          storeScheme[iRun,"upper"] <- NA
-        }
-
-        #Regular case: If number of rows is enough to fit segmented regression
-        if(nrow(sTacsat) > minRows){
-
-          #Define starting values for segmented regression
-          hi  <- hist(sTacsat$SI_SP,breaks=diff(c(floor(  range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[1]),
-                                                  ceiling(range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[2]))),plot=FALSE)
-          acc <- diff(diff(cumsum(hi$counts)))
-          idx <- rev(sort(abs(diff(diff(cumsum(hi$counts))))))[1:2]
-          cnts<- which(abs(acc) %in% idx)+2 #Taking twice diff, so add 2 to get back to counts
-          psi <- list(x=range(hi$breaks[cnts]))               #First guess on breakpoints
-          psiOrig <- psi
-          dat <- data.frame(x=sort(sTacsat$SI_SP[sTacsat$SI_SP>0]),
-                            y=1:length(sTacsat$SI_SP[sTacsat$SI_SP>0]))
-          if(logfit==TRUE) dat <- data.frame(x=      an(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])),
-                                                         ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0]))))),
-                                          y=1:length(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])),
-                                                         ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0]))))))
-          o <- 1 ; class(o) <- "try-error" ; count <- 0  ; bound1 <-NULL ; bound2 <- NULL;
-
-          #Fit the model
-          while(class(o)=="try-error"){
-            count <- count+1
-            o <- try(
-                     segmented(lm(y~x, data=dat) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses
-                     silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so...
-            if(!"try-error" %in% class(o)) break else psi <- list(x=c(sort(runif(2,min=range(sTacsat$SI_SP,na.rm=TRUE)[1],max=range(sTacsat$SI_SP,na.rm=TRUE)[2])))) # searching decreasing by 1 each time
-            if(count>20) {bound1 <- psiOrig$x[1]; bound2 <- psiOrig$x[2] ; cat("failure of the segmented regression for",paste(c("year","month","week","analyse.by"),storeScheme[iRun,1:4]),"\n"); break}
-          }
-          #Calculate the bounds and whether the fit has been successful or not
-          if(is.null(bound1)==T & is.null(bound2)==TRUE){
-            bound1 <- max(range(sTacsat$SI_SP)[1],min(confint(o,level=CI)[,grep("low",colnames(confint(o)))]))
-            bound2 <- min(range(sTacsat$SI_SP)[2],max(confint(o,level=CI)[,grep("up", colnames(confint(o)))]))
-            if(class(o)[1] != "try-error") storeScheme[iRun,"success"] <- 1
-          }
-
-          #Save the bounds
-          if(is.null(forceLowerBound)==FALSE)
-            bound1 <- forceLowerBound
-          if(bound2 < bound1)
-            bound2 <- bound1
-          storeScheme[iRun,"lower"] <- bound1
-          storeScheme[iRun,"upper"] <- bound2
-
-          tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP >= storeScheme[iRun,"lower"] & sTacsat$SI_SP <= storeScheme[iRun,"upper"])]] <- "f" #Fishing
-          tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP <  storeScheme[iRun,"lower"] | sTacsat$SI_SP >  storeScheme[iRun,"upper"])]] <- "nf" #Steaming / in harbour
-        }
-      }
-    }
-  #Write the results to file and display the success rates
-  write.csv(storeScheme,file=file.path(saveDir,"storeScheme.csv"))
-  cat("Successful segmented regression fits",length(which(storeScheme$success==1)),"\n",
-      "versus unsuccessful fits",length(which(storeScheme$success == 0)),"\n\n",
-      "Check ",file.path(saveDir,"storeScheme.csv"),"for details \n\n")
-      
-  cat("Note: fishing = f, no fishing = nf\n")
-return(tacsatOrig[,-grep("idxFun",colnames(tacsatOrig))])}
-    
-#res <- segmentedTacsatSpeed(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95)
-    
-
+#' Define activity based on segmented regression of speed profile
+#' 
+#' Given the speed profile by gear or vessel in a user defined time frame a
+#' segmented regression analyses will be performed to indicate where the
+#' fishing speeds are located. The segmented regression takes place on the
+#' cumulative speed profile.
+#' 
+#' To fit a speed profile at least 20 VMS pings must exist. The function highly
+#' depends on accurate starting points. After 20 random tries the fitting
+#' procedure exits and returns a '0' success.
+#' 
+#' @param tacsat A tacsat dataset (with optional column "LE_GEAR" when matched
+#' to eflalo)
+#' @param units Analyse by: "year", "month" and "week". "month" and "week"
+#' cannot be used at same time.
+#' @param analyse.by Analyse tacsat by gear ("LE_GEAR"), vessel ("VE_REF") or a
+#' combination of gear and vessel ("VE_REF+LE_GEAR").
+#' @param speed Define if speed profile used must be taken as given in the
+#' tacsat file (speed = "instantanious") or if internally speed must be
+#' calculated (speed = "calculated"). Default is "calculated"
+#' @param logfit Logical. Define whether the speed profile frequencies must be
+#' log-transformed. Default is F.
+#' @param CI Define confidence interval for calculated segmented break points.
+#' Default is 0.95.
+#' @param saveDir Directory to save overview and success of fit. Default =
+#' tempdir().
+#' @param forceLowerBound Fix the lower breakpoint value at the forceLowerBound
+#' given. If not specified, lower breakpoint is estimated.
+#' @return SI_STATE = nf for no-fishing and SI_STATE = f for fishing
+#' @author Niels T. Hintzen, Francois Bastardie
+#' @seealso \code{\link{activityTacsatAnalyse}}, \code{\link{activityTacsat}}
+#' @references Bastardie et al. 2010
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat <- tacsat[1:20000,]
+#' 
+#' #-Fit based on vessel and calculated speed
+#' newTacsat <- segmentedTacsatSpeed(tacsat,units="year",analyse.by="VE_REF",
+#'                                   speed="calculated",logfit=FALSE,CI=0.95)
+#' 
+#' data(eflalo)
+#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]
+#' 
+#' #-Fit based on gear and instantaneous speed
+#' newTacsat <- segmentedTacsatSpeed(tacsatp,units="year",analyse.by="LE_GEAR",
+#'                                   speed="instantaneous",logfit=FALSE,CI=0.95)
+#' 
+#' 
+#' @export segmentedTacsatSpeed
+segmentedTacsatSpeed <- function(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95,saveDir=tempdir(),forceLowerBound=NULL){
+
+  require(segmented)
+  tacsat$idxFun <- 1:nrow(tacsat)
+
+  #Regular checks
+  if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE  <- NA
+  if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM  <- as.POSIXct(paste(tacsat$SI_DATE,  tacsat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+  if(analyse.by == "LE_GEAR"){ if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'")}
+  if(!analyse.by %in% c("LE_GEAR","VE_REF","VE_REF+LE_GEAR")) warning("Analysing by unknown column variable, please check!")
+  if(analyse.by == "VE_REF+LE_GEAR"){
+    if(!"VE_REF" %in% colnames(tacsat) | !"LE_GEAR" %in% colnames(tacsat)) stop("VE_REF and LE_GEAR not both in tacsat available")
+    tacsat$VE_REF_LE_GEAR <- paste(tacsat$VE_REF,tacsat$LE_GEAR)
+    analyse.by <- "VE_REF_LE_GEAR"
+  }
+
+  tacsatOrig <- tacsat
+  if(analyse.by %in% colnames(tacsat)){
+    if(units == "all"){   yrs <- 0; mths <- 0; wks <- 0}
+    if(units == "year"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- 0;                                    wks  <- 0}
+    if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths  <- sort(unique(month(tacsat$SI_DATIM))); wks  <- 0}
+    if(units == "week"){  yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks   <- sort(unique(week(tacsat$SI_DATIM)));  mths <- 0}
+  } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")}
+    storeScheme               <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by]))
+    # Add upper and lower boundaries to storeScheme
+    storeScheme               <- cbind(storeScheme,data.frame(lower=rep(0,nrow(storeScheme)),
+                                                              upper=rep(0,nrow(storeScheme))))
+    storeScheme$analyse.by    <- ac(storeScheme$analyse.by)
+    storeScheme$success       <- 0
+  for(iRun in 1:nrow(storeScheme)){
+      yr  <- storeScheme[iRun,"years"]
+      mth <- storeScheme[iRun,"months"]
+      wk  <- storeScheme[iRun,"weeks"]
+      aby <- storeScheme[iRun,"analyse.by"]
+      if(analyse.by == "VE_REF"){
+        if(nrow(storeScheme)==1){ sTacsat <- tacsat
+        } else {
+            if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF == aby)
+            if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF == aby)
+            if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF == aby)
+          }
+      }
+      if(analyse.by == "LE_GEAR"){
+        if(nrow(storeScheme)==1){ sTacsat <- tacsat
+        } else {
+            if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & LE_GEAR == aby)
+            if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & LE_GEAR == aby)
+            if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & LE_GEAR == aby)
+          }
+      }
+      if(analyse.by == "VE_REF_LE_GEAR"){
+        if(nrow(storeScheme)==1){ sTacsat <- tacsat
+        } else {
+            if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF_LE_GEAR == aby )
+            if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF_LE_GEAR == aby)
+            if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF_LE_GEAR == aby)
+          }
+      }
+      
+      #Check if there is any data available
+      if(nrow(sTacsat)>0){
+
+        #Check if you want to run with calculated or instantaneous speed
+        if(speed == "calculated"){
+          sTacsat             <- sortTacsat(sTacsat)
+          sTacsat$SI_SP_ORIG  <- sTacsat$SI_SP
+          if("FT_REF" %in% colnames(tacsat)){ sTacsat$SI_SP <- calculateSpeed(sTacsat,level="trip",   weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA
+          } else {                             sTacsat$SI_SP <- calculateSpeed(sTacsat,level="vessel", weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA}
+        }
+        #Remove records where SI_SP is NA
+        sTacsat <- sTacsat[which(is.na(sTacsat$SI_SP)==F & sTacsat$SI_SP > 0 & sTacsat$SI_SP <= 10),]
+
+
+        minRows <- 20 #Based on a scan of sub tacsat datasets, 5% percentile was approx 20
+
+        #Exception exercise: If number of rows in sTacsat is really low, return NA
+        if(nrow(sTacsat) <= minRows){
+          storeScheme[iRun,"lower"] <- NA
+          storeScheme[iRun,"upper"] <- NA
+        }
+
+        #Regular case: If number of rows is enough to fit segmented regression
+        if(nrow(sTacsat) > minRows){
+
+          #Define starting values for segmented regression
+          hi  <- hist(sTacsat$SI_SP,breaks=diff(c(floor(  range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[1]),
+                                                  ceiling(range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[2]))),plot=FALSE)
+          acc <- diff(diff(cumsum(hi$counts)))
+          idx <- rev(sort(abs(diff(diff(cumsum(hi$counts))))))[1:2]
+          cnts<- which(abs(acc) %in% idx)+2 #Taking twice diff, so add 2 to get back to counts
+          psi <- list(x=range(hi$breaks[cnts]))               #First guess on breakpoints
+          psiOrig <- psi
+          dat <- data.frame(x=sort(sTacsat$SI_SP[sTacsat$SI_SP>0]),
+                            y=1:length(sTacsat$SI_SP[sTacsat$SI_SP>0]))
+          if(logfit==TRUE) dat <- data.frame(x=      an(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])),
+                                                         ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0]))))),
+                                          y=1:length(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])),
+                                                         ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0]))))))
+          o <- 1 ; class(o) <- "try-error" ; count <- 0  ; bound1 <-NULL ; bound2 <- NULL;
+
+          #Fit the model
+          while(class(o)=="try-error"){
+            count <- count+1
+            o <- try(
+                     segmented(lm(y~x, data=dat) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses
+                     silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so...
+            if(!"try-error" %in% class(o)) break else psi <- list(x=c(sort(runif(2,min=range(sTacsat$SI_SP,na.rm=TRUE)[1],max=range(sTacsat$SI_SP,na.rm=TRUE)[2])))) # searching decreasing by 1 each time
+            if(count>20) {bound1 <- psiOrig$x[1]; bound2 <- psiOrig$x[2] ; cat("failure of the segmented regression for",paste(c("year","month","week","analyse.by"),storeScheme[iRun,1:4]),"\n"); break}
+          }
+          #Calculate the bounds and whether the fit has been successful or not
+          if(is.null(bound1)==T & is.null(bound2)==TRUE){
+            bound1 <- max(range(sTacsat$SI_SP)[1],min(confint(o,level=CI)[,grep("low",colnames(confint(o)))]))
+            bound2 <- min(range(sTacsat$SI_SP)[2],max(confint(o,level=CI)[,grep("up", colnames(confint(o)))]))
+            if(class(o)[1] != "try-error") storeScheme[iRun,"success"] <- 1
+          }
+
+          #Save the bounds
+          if(is.null(forceLowerBound)==FALSE)
+            bound1 <- forceLowerBound
+          if(bound2 < bound1)
+            bound2 <- bound1
+          storeScheme[iRun,"lower"] <- bound1
+          storeScheme[iRun,"upper"] <- bound2
+
+          tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP >= storeScheme[iRun,"lower"] & sTacsat$SI_SP <= storeScheme[iRun,"upper"])]] <- "f" #Fishing
+          tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP <  storeScheme[iRun,"lower"] | sTacsat$SI_SP >  storeScheme[iRun,"upper"])]] <- "nf" #Steaming / in harbour
+        }
+      }
+    }
+  #Write the results to file and display the success rates
+  write.csv(storeScheme,file=file.path(saveDir,"storeScheme.csv"))
+  cat("Successful segmented regression fits",length(which(storeScheme$success==1)),"\n",
+      "versus unsuccessful fits",length(which(storeScheme$success == 0)),"\n\n",
+      "Check ",file.path(saveDir,"storeScheme.csv"),"for details \n\n")
+      
+  cat("Note: fishing = f, no fishing = nf\n")
+return(tacsatOrig[,-grep("idxFun",colnames(tacsatOrig))])}
+    
+#res <- segmentedTacsatSpeed(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95)
+    
+
diff --git a/vmstools/R/selectMainSpecies.r b/vmstools/R/selectMainSpecies.r
index fcf205c..278717e 100644
--- a/vmstools/R/selectMainSpecies.r
+++ b/vmstools/R/selectMainSpecies.r
@@ -1,316 +1,426 @@
-################################################################################
-# EXPLORE THE SELECTED SPECIES DEPENDING ON THE METHOD (HAC, TOTALE, LOGEVENT) #
-# AND THE THRESHOLD CHOSEN                                                     #
-################################################################################
-
-selectMainSpecies=function(dat,analysisName="",RunHAC=TRUE,DiagFlag=FALSE){
-
-    require(FactoMineR)   # function PCA
-    require(cluster)      # functions pam & clara
-    require(SOAR)         # function Store
-    require(amap)         # function hcluster
-    require(MASS)         # function lda
-    require(mda)          # function fda
-
-    p=ncol(dat)   # Number of species
-    n=nrow(dat)
-      
-    # Transform quantities to proportions of total quantity caught by logevent
-    print("calculating proportions...") 
-
-    propdat=transformation_proportion(dat[,2:p])
-    nameSpecies=colnames(propdat)
-    nbAllSpecies=ncol(propdat)
-    
-    t1=Sys.time()           
-
-    if (RunHAC == TRUE) {
-
-      # METHOD : 'HAC'
-      
-      print("######## SPECIES EXPLORATION METHOD 1: HAC ########")
-      # Transposing data
-      table_var=table_variables(propdat)
-  
-      # HAC
-      print("cluster...")
-      cah_var=hcluster(table_var, method="euclidean", link="ward")
-  
-      Store(objects())
-      gc(reset=TRUE)
-
-      # Select the number of clusters by scree-test
-      inerties.vector=cah_var$height[order(cah_var$height,decreasing=TRUE)]
-      nb.finalclusters=which(scree(inerties.vector)[,"epsilon"]<0)[1]
-  
-      if(!is.na(nb.finalclusters)){
-        # Dendogram cutting at the selected level
-        cah_cluster_var=cutree(cah_var,k=nb.finalclusters)
-
-        png(paste(analysisName,"HAC_Dendogram_Step1.png",sep="_"), width = 1200, height = 800)
-        plot(cah_var,labels=FALSE,hang=-1,ann=FALSE)
-        title(main="HAC dendogram",xlab="Species",ylab="Height")
-        rect.hclust(cah_var, k=nb.finalclusters)
-        dev.off()
-
-        temp=select_species(dat[,2:p],cah_cluster_var)
-        namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species
-
-        fait=FALSE
-        nb_cut=1
-        while ((fait == FALSE) && (nb_cut < (p-nb.finalclusters-2))) {
-          # cutting below
-          print(paste("----------- nb_cut =",nb_cut))
-          cah_cluster_var_step=cutree(cah_var,k=(nb.finalclusters+nb_cut))
-          # testing residual species
-          print(paste("------------- Residual species cluster(s) ",unique(cah_cluster_var_step[namesResidualSpecies])))
-          if (length(unique(cah_cluster_var_step[namesResidualSpecies]))==1) {
-            print(paste("-------------  No residual cut -----"))
-            nb_cut = nb_cut+1 # cutting below
-          }else{
-            print("-------------  Residual cut -----")
-            nbSpeciesClusters=table(cah_cluster_var_step[namesResidualSpecies])
-            # testing if a species is alone in a group
-            if (sort(nbSpeciesClusters)[1]>1) { # if not alone
-              print("------- I stop and have a beer ------")
-              fait = TRUE # then I stop
-            }else{
-              print("------ Updating residual species -----")
-              nb_cut = nb_cut+1;  # if alone then cutting below and updating nameResidualSpecies to start again
-              numGroupSpeciesAlone = as.numeric(names(sort(nbSpeciesClusters)[1]))
-              namesSpeciesAlone = names(cah_cluster_var_step)[which(cah_cluster_var_step==numGroupSpeciesAlone)]
-              namesResidualSpecies = namesResidualSpecies[ - which(namesResidualSpecies==namesSpeciesAlone)]
-              print(paste("---- Adding new species ---",namesSpeciesAlone))
-            }
-          }
-        } # end of while
-
-
-        # If all species are selected step by step, the final k is the initial cut (nb.finalclusters)
-        if((nb.finalclusters+nb_cut)>=(p-2)){
-          kFinal=nb.finalclusters
-          cah_cluster_var=cutree(cah_var,k=kFinal)
-          temp=select_species(dat[,2:p],cah_cluster_var)
-          namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species
-        }
-
-
-        # Dendogram of the first cut in the residual species cluster
-        png(paste(analysisName,"HAC_Dendogram_Step1_ResidualSpecies.png",sep="_"), width = 1200, height = 800)
-        plot(cah_var,labels=FALSE,hang=-1,ann=FALSE)
-        title(main="HAC dendogram - Step",xlab="Species",ylab="Height")
-        if((nb.finalclusters+nb_cut)>=(p-2)){
-          rect.hclust(cah_var, k=kFinal)
-        }else{
-          rect.hclust(cah_var, k=(nb.finalclusters+nb_cut))
-        }
-        dev.off()
-
-        # Selection of main species
-        nomespsel=setdiff(nameSpecies,namesResidualSpecies)
-        cat("main species : ",nomespsel,"\n")
-
-        # Return the dataset retaining only the main species
-        nbMainSpeciesHAC=length(nomespsel)
-        namesMainSpeciesHAC=nomespsel
-        propNbMainSpeciesHAC=nbMainSpeciesHAC/nbAllSpecies*100
-
-        if(DiagFlag==TRUE) {
-          datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel)
-          pourcentCatchMainSpeciesHAC=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100
-          medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC)
-        }
-
-        Store(objects())
-        gc(reset=TRUE)
-
-      } else {
-         namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA
-      }
-      
-  print(Sys.time()-t1)
-
-  }else{ namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA }
- 
- 
-    # METHOD : 'TOTALE'
-    
-    print("######## SPECIES EXPLORATION METHOD 2: 'TOTAL' ########")
-    
-    # Total quantity caught by species
-    sumcol=numeric(length=p-1)
-    for(i in 2:p){
-      sumcol[i-1]=sum(dat[,i], na.rm=TRUE)
-    }
-    names(sumcol)=names(dat)[-1]
-
-    # Percent of each species in the total catch
-    propesp=sumcol/sum(sumcol,na.rm=TRUE)*100
-    # Columns number of each species by decreasing order of capture
-    numesp=order(propesp,decreasing=TRUE)
-    # Percent of each species in the total catch by cumulated decreasing order
-    propesp=cumsum(propesp[order(propesp,decreasing=TRUE)])
-
-    # We are taking all species until having at least seuil% of total catch
-    nbMainSpeciesTotal=numeric()
-    medianPourcentCatchMainSpeciesTotal=numeric()
-
-    for(seuil in seq(5,100,5)){
-      cat("seuil:",seuil,"\n")
-      pourcent=which(propesp<=seuil)
-      # We are taking the name of selected species
-      espsel=numesp[1:(length(pourcent)+1)]
-      nomespsel=nameSpecies[espsel]
-      nbMainSpeciesTotal[seuil/5]=length(nomespsel)
-
-      if(DiagFlag==TRUE) {
-        # We are building the table with main species and aggregated other species
-        datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel)
-        if(length(nomespsel)==1){
-          vectorNul=rep(0,n)
-          datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul)
-        }
-        pourcentCatchMainSpeciesTotal=apply(datSpeciesWithoutProp,1,sum, na.rm=TRUE)/apply(dat[,2:p],1,sum, na.rm=TRUE)*100
-        medianPourcentCatchMainSpeciesTotal[seuil/5]=median(pourcentCatchMainSpeciesTotal)
-      }
-    }
-    nbMainSpeciesTotal=c(0,nbMainSpeciesTotal)
-    nbMainSpeciesTotal[length(nbMainSpeciesTotal)]=p-1
-    namesMainSpeciesTotal=nomespsel[1:nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]]
-    propNbMainSpeciesTotal=nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]/nbAllSpecies*100
-    
-    if (DiagFlag) medianPourcentCatchMainSpeciesTotal=c(0,medianPourcentCatchMainSpeciesTotal)
-
-    Store(objects())
-    gc(reset=TRUE)
-      
-    print(Sys.time()-t1)
-
-
-
-    # METHOD : 'LOGEVENT'
-    
-    print("######## SPECIES EXPLORATION METHOD 3: 'LOGEVENT' ########")
-    
-    nbMainSpeciesLogevent=numeric()
-    medianPourcentCatchMainSpeciesLogevent=numeric()
-
-    for(seuil in seq(5,100,5)){
-      cat("seuil:",seuil,"\n")
-      nomespsel=character()
-      # We are taking all species with a % of catch >= seuil% for at least one logevent
-      for (i in nameSpecies) if (!is.na(any(propdat[,i]>=seuil)) && any(propdat[,i]>=seuil)) nomespsel <- c(nomespsel,i)
-      nbMainSpeciesLogevent[seuil/5]=length(nomespsel)
-   
-      # We are building the table with main species and aggregated other species
-      if(DiagFlag==TRUE) {
-        datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel)
-        if(length(nomespsel)==1){
-          vectorNul=rep(0,n)
-          datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul)
-        }
-        pourcentCatchMainSpeciesLogevent=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100
-        medianPourcentCatchMainSpeciesLogevent[seuil/5]=median(pourcentCatchMainSpeciesLogevent)
-      }
-    }
-    nbMainSpeciesLogevent=c(p-1,nbMainSpeciesLogevent)
-    namesMainSpeciesLogevent=nomespsel
-    propNbMainSpeciesLogevent=nbMainSpeciesLogevent[length(nbMainSpeciesLogevent)]/nbAllSpecies*100
-    
-    
-    if(DiagFlag) medianPourcentCatchMainSpeciesLogevent=c(100,medianPourcentCatchMainSpeciesLogevent)
-
-    print(Sys.time()-t1)
-
-    # GRAPHICS
-
-    # Number of main species
-    X11(5,5)
-    plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species")
-    lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3)
-    if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3)
-    axis(1)
-    axis(2, las=2)
-    box()
-    legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n")
-    savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png")
-    dev.off()
-
-    X11(5,5)
-    plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species")
-    lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3)
-    if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3)
-    axis(1)
-    axis(2, las=2)
-    box()
-    legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n")
-    savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png")
-    dev.off()
-
-    # Black and white version
-    X11(5,5)
-    plot(seq(0,100,5),nbMainSpeciesTotal, type='l' ,lty='dashed', col="black",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species")
-    lines(seq(0,100,5),nbMainSpeciesLogevent, type='l', lty='dotted', col="black",lwd=3)
-    if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="black",lwd=3)
-    axis(1)
-    axis(2, las=2)
-    box()
-    legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("black", "black", "black"),bty="n",lty=c('solid','dashed','dotted'),box.lty = par("lty"))
-    savePlot(filename = paste(analysisName,'Number of main species_new_2',sep="_"),type ="png")
-    dev.off()
-
-    # Median percentage of catch represented by main species by logevent
-    if(DiagFlag){
-      png(paste(analysisName,"Median percentage of catch represented by main species by logevent.png",sep="_"), width = 1200, height = 800)
-      plot(seq(0,100,5),medianPourcentCatchMainSpeciesTotal,type='l',col="blue",lwd=2, main="Median percentage of catch represented by main species by logevent depending of the threshold", xlab="Threshold (%)",ylab="Median percentage of catch represented by main species by logevent")
-      lines(seq(0,100,5),medianPourcentCatchMainSpeciesLogevent,col="green",lwd=2)
-      if (RunHAC==TRUE) abline(medianPourcentCatchMainSpeciesHAC,0, col="red",lwd=2)
-      mtext(paste(p-1," Species"),col='darkblue')
-      if (RunHAC==TRUE) legend(70, 40, c("HAC", "Total", "Logevent"),lwd=2,col=c("red", "blue", "green"))
-      if (RunHAC==FALSE) legend(70, 40, c("Total", "Logevent"),lwd=2,col=c("blue", "green"))
-      dev.off()
-    }
-    
-    listSpecies=sort(unique(c(namesMainSpeciesHAC,namesMainSpeciesTotal,namesMainSpeciesLogevent)))
-
-    # Proportion of the total catch represented by the species in listSpecies (= namesMainSpeciesAll)
-    catchListSpecies=sumcol[listSpecies]
-    propCatchListSpecies=sum(catchListSpecies)/sum(sumcol)*100
-    
-
-    if(DiagFlag==FALSE) { 
-      explo_species = list(nbAllSpecies=nbAllSpecies,
-                            propNbMainSpeciesHAC=propNbMainSpeciesHAC,
-                            propNbMainSpeciesTotal=propNbMainSpeciesTotal,
-                            propNbMainSpeciesLogevent=propNbMainSpeciesLogevent,
-                            nbMainSpeciesHAC=nbMainSpeciesHAC, 
-                            nbMainSpeciesTotal=nbMainSpeciesTotal, 
-                            nbMainSpeciesLogevent=nbMainSpeciesLogevent,
-                            namesMainSpeciesHAC=sort(namesMainSpeciesHAC), 
-                            namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal),                                             
-                            namesMainSpeciesTotalByImportance=namesMainSpeciesTotal,
-                            namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent),
-                            namesMainSpeciesAll=listSpecies,
-                            propCatchMainSpeciesAll=propCatchListSpecies) 
-    }else{         
-      explo_species = list(nbAllSpecies=nbAllSpecies,
-                            propNbMainSpeciesHAC=propNbMainSpeciesHAC,
-                            propNbMainSpeciesTotal=propNbMainSpeciesTotal,
-                            propNbMainSpeciesLogevent=propNbMainSpeciesLogevent,
-                            nbMainSpeciesHAC=nbMainSpeciesHAC, 
-                            nbMainSpeciesTotal=nbMainSpeciesTotal, 
-                            nbMainSpeciesLogevent=nbMainSpeciesLogevent,
-                            namesMainSpeciesHAC=sort(namesMainSpeciesHAC), 
-                            namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal),                                             
-                            namesMainSpeciesTotalByImportance=namesMainSpeciesTotal,
-                            namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent),
-                            namesMainSpeciesAll=listSpecies,
-                            medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC),
-                            medianPourcentCatchMainSpeciesTotal=medianPourcentCatchMainSpeciesTotal,
-                            medianPourcentCatchMainSpeciesLogevent=medianPourcentCatchMainSpeciesLogevent,
-                            propCatchMainSpeciesAll=propCatchListSpecies)
-    }    
-
-    return(explo_species)
-    
-}
+################################################################################
+# EXPLORE THE SELECTED SPECIES DEPENDING ON THE METHOD (HAC, TOTALE, LOGEVENT) #
+# AND THE THRESHOLD CHOSEN                                                     #
+################################################################################
+
+
+
+#' Identyfing in an EFLALO dataset which species can be considered as important
+#' for the analysis of target species, by crossing three different approaches.
+#' 
+#' A typical logbook dataset will contain a large number of species recorded,
+#' but only a limited number of these could potentially be considered as target
+#' species. This function aims thus at identifying these by using three
+#' different approaches : - HAC (Hierarchical Ascending Classification) based
+#' on Euclidian distances between species with Ward aggregating criteria; -
+#' Total, where species are ranked based on their proportion in the total
+#' catch, and those cumulating to a given percentage are retained - Logevent,
+#' where species are selected if they represent at least a given percentage of
+#' the logevent's catch for at least one logevent (one line)
+#' 
+#' The HAC and the Logevent methods work on catch data transformed in
+#' percentage of species by logevent (line), in order to remove the effect of
+#' large hauls compared to small hauls. The Total method works on raw data.  In
+#' the HAC method, a first group of species, the residual ones, is identified
+#' by clustering and using a first-order scree test for cutting the tree.
+#' Other species are pooled in the group of principals. New similar HACs are
+#' run through a loop on this group of residuals species, to identify if any
+#' new species might have been left aside in the first run. It is important to
+#' note though that HAC method might quickly reach memory limits on standard
+#' PCs, and may thus not be run on very large datasets. In the Total method,
+#' the percentage threshold is being increased with 5\% steps from 5 to 100,
+#' and the ranked species summing up to this value is recorded.  In the
+#' Logevent method, the percentage threshold is also being increased with 5\%
+#' steps from 5 to 100, and all species representing at least this value in at
+#' least one line are recorded.
+#' 
+#' This function allows thus to explore the variability and the sensitivity of
+#' the definition of key species to differences in concepts and subjective
+#' thresholds. A plot showing the number of species retained according to these
+#' two criteria is produced, allowing the user to make a qualitative choice
+#' when this will be used for identyfing metiers. Empirical experience led us
+#' to suggest that the combination of species entering either the HAC, or 95\%
+#' of the total cumulated catch, or 100\% of at least one logevent would be
+#' sufficient to cover the main target species of a standard logbook dataset,
+#' but other choices could be made.
+#' 
+#' 
+#' @param dat a data.frame reduced from an eflalo format. It should contain
+#' only the LE_ID (Logevent ID) variable as well as all species names in
+#' columns, with raw catch data. It is necessary to sort out potential
+#' error-prone lines (such as lines with only 0) prior to the analysis, and to
+#' replace NA values by 0.
+#' @param analysisName character, the name of the run. Used for the file name
+#' of the plots.
+#' @param RunHAC Boolean. In case of very large datasets, memory limits might
+#' be reached with HAC method. This option allows thus to skip the method and
+#' consider species selection using the two other methods only.
+#' @param DiagFlag Boolean. If DiagFlag=TRUE, additional plots and diagnostics
+#' are produced. Not very used.
+#' @return The function produces a plot (saved in the "analysisName" working
+#' directory showing the number of species selected according to the method and
+#' the percentage threshold selected for both 'Total' and 'Logevent' methods.
+#' The function returns also a list of diagnostics of the three methods :
+#' \item{nbAllSpecies}{Number of species initially in the dataset }
+#' \item{propNbMainSpeciesHAC}{Proportion of the number of species retained by
+#' the HAC method to the total number of species }
+#' \item{propNbMainSpeciesTotal}{Proportion of the number of species retained
+#' by the Total method to the total number of species }
+#' \item{propNbMainSpeciesLogevent}{Proportion of the number of species
+#' retained by the Logevent method to the total number of species }
+#' \item{nbMainSpeciesHAC}{Number of species retained by the HAC method }
+#' \item{nbMainSpeciesTotal}{Number of species retained by the Total method
+#' with a percentage threshold increasing at 5\% step from 5\% to 100\% }
+#' \item{nbMainSpeciesLogevent}{Number of species retained by the Logevent
+#' method with a percentage threshold increasing at 5\% step from 5\% to 100\%
+#' } \item{namesMainSpeciesHAC}{Names of species retained by the HAC method }
+#' \item{namesMainSpeciesTotalAlphabetical}{Names of species retained by the
+#' Total method with 95\% threshold in alphabetical order, for easier
+#' comparison with the two other methods }
+#' \item{namesMainSpeciesTotalByImportance}{Names of species retained by the
+#' Total method with 95\% threshold in ranked order of importance }
+#' \item{namesMainSpeciesLogevent}{Names of species retained by the Logevent
+#' method with 100\% threshold } \item{namesMainSpeciesAll}{Unique combination
+#' of the species retained in either HAC method, Total method with 95\%
+#' threshold, and Logevent method with 100\% threshold }
+#' \item{propCatchMainSpeciesAll}{Proportion of the total catch represented by
+#' the selected species (species in namesMainSpeciesAll) }
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @seealso \code{\link{extractTableMainSpecies}}
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @examples
+#' 
+#' 
+#' 
+#' data(eflalo)
+#' 
+#' eflalo <- formatEflalo(eflalo)
+#' 
+#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",]
+#' 
+#' # note that output plots will be sent to getwd()
+#' analysisName <- "metier_analysis_OTB"
+#' 
+#' dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))]
+#' names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]]))
+#' 
+#' explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE)
+#'   #=> send the LE_ID and LE_EURO_SP columns only
+#' 
+#' @export selectMainSpecies
+selectMainSpecies=function(dat,analysisName="",RunHAC=TRUE,DiagFlag=FALSE){
+
+    require(FactoMineR)   # function PCA
+    require(cluster)      # functions pam & clara
+    require(SOAR)         # function Store
+    require(amap)         # function hcluster
+    require(MASS)         # function lda
+    require(mda)          # function fda
+
+    p=ncol(dat)   # Number of species
+    n=nrow(dat)
+      
+    # Transform quantities to proportions of total quantity caught by logevent
+    print("calculating proportions...") 
+
+    propdat=transformation_proportion(dat[,2:p])
+    nameSpecies=colnames(propdat)
+    nbAllSpecies=ncol(propdat)
+    
+    t1=Sys.time()           
+
+    if (RunHAC == TRUE) {
+
+      # METHOD : 'HAC'
+      
+      print("######## SPECIES EXPLORATION METHOD 1: HAC ########")
+      # Transposing data
+      table_var=table_variables(propdat)
+  
+      # HAC
+      print("cluster...")
+      cah_var=hcluster(table_var, method="euclidean", link="ward")
+  
+      Store(objects())
+      gc(reset=TRUE)
+
+      # Select the number of clusters by scree-test
+      inerties.vector=cah_var$height[order(cah_var$height,decreasing=TRUE)]
+      nb.finalclusters=which(scree(inerties.vector)[,"epsilon"]<0)[1]
+  
+      if(!is.na(nb.finalclusters)){
+        # Dendogram cutting at the selected level
+        cah_cluster_var=cutree(cah_var,k=nb.finalclusters)
+
+        png(paste(analysisName,"HAC_Dendogram_Step1.png",sep="_"), width = 1200, height = 800)
+        plot(cah_var,labels=FALSE,hang=-1,ann=FALSE)
+        title(main="HAC dendogram",xlab="Species",ylab="Height")
+        rect.hclust(cah_var, k=nb.finalclusters)
+        dev.off()
+
+        temp=select_species(dat[,2:p],cah_cluster_var)
+        namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species
+
+        fait=FALSE
+        nb_cut=1
+        while ((fait == FALSE) && (nb_cut < (p-nb.finalclusters-2))) {
+          # cutting below
+          print(paste("----------- nb_cut =",nb_cut))
+          cah_cluster_var_step=cutree(cah_var,k=(nb.finalclusters+nb_cut))
+          # testing residual species
+          print(paste("------------- Residual species cluster(s) ",unique(cah_cluster_var_step[namesResidualSpecies])))
+          if (length(unique(cah_cluster_var_step[namesResidualSpecies]))==1) {
+            print(paste("-------------  No residual cut -----"))
+            nb_cut = nb_cut+1 # cutting below
+          }else{
+            print("-------------  Residual cut -----")
+            nbSpeciesClusters=table(cah_cluster_var_step[namesResidualSpecies])
+            # testing if a species is alone in a group
+            if (sort(nbSpeciesClusters)[1]>1) { # if not alone
+              print("------- I stop and have a beer ------")
+              fait = TRUE # then I stop
+            }else{
+              print("------ Updating residual species -----")
+              nb_cut = nb_cut+1;  # if alone then cutting below and updating nameResidualSpecies to start again
+              numGroupSpeciesAlone = as.numeric(names(sort(nbSpeciesClusters)[1]))
+              namesSpeciesAlone = names(cah_cluster_var_step)[which(cah_cluster_var_step==numGroupSpeciesAlone)]
+              namesResidualSpecies = namesResidualSpecies[ - which(namesResidualSpecies==namesSpeciesAlone)]
+              print(paste("---- Adding new species ---",namesSpeciesAlone))
+            }
+          }
+        } # end of while
+
+
+        # If all species are selected step by step, the final k is the initial cut (nb.finalclusters)
+        if((nb.finalclusters+nb_cut)>=(p-2)){
+          kFinal=nb.finalclusters
+          cah_cluster_var=cutree(cah_var,k=kFinal)
+          temp=select_species(dat[,2:p],cah_cluster_var)
+          namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species
+        }
+
+
+        # Dendogram of the first cut in the residual species cluster
+        png(paste(analysisName,"HAC_Dendogram_Step1_ResidualSpecies.png",sep="_"), width = 1200, height = 800)
+        plot(cah_var,labels=FALSE,hang=-1,ann=FALSE)
+        title(main="HAC dendogram - Step",xlab="Species",ylab="Height")
+        if((nb.finalclusters+nb_cut)>=(p-2)){
+          rect.hclust(cah_var, k=kFinal)
+        }else{
+          rect.hclust(cah_var, k=(nb.finalclusters+nb_cut))
+        }
+        dev.off()
+
+        # Selection of main species
+        nomespsel=setdiff(nameSpecies,namesResidualSpecies)
+        cat("main species : ",nomespsel,"\n")
+
+        # Return the dataset retaining only the main species
+        nbMainSpeciesHAC=length(nomespsel)
+        namesMainSpeciesHAC=nomespsel
+        propNbMainSpeciesHAC=nbMainSpeciesHAC/nbAllSpecies*100
+
+        if(DiagFlag==TRUE) {
+          datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel)
+          pourcentCatchMainSpeciesHAC=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100
+          medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC)
+        }
+
+        Store(objects())
+        gc(reset=TRUE)
+
+      } else {
+         namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA
+      }
+      
+  print(Sys.time()-t1)
+
+  }else{ namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA }
+ 
+ 
+    # METHOD : 'TOTALE'
+    
+    print("######## SPECIES EXPLORATION METHOD 2: 'TOTAL' ########")
+    
+    # Total quantity caught by species
+    sumcol=numeric(length=p-1)
+    for(i in 2:p){
+      sumcol[i-1]=sum(dat[,i], na.rm=TRUE)
+    }
+    names(sumcol)=names(dat)[-1]
+
+    # Percent of each species in the total catch
+    propesp=sumcol/sum(sumcol,na.rm=TRUE)*100
+    # Columns number of each species by decreasing order of capture
+    numesp=order(propesp,decreasing=TRUE)
+    # Percent of each species in the total catch by cumulated decreasing order
+    propesp=cumsum(propesp[order(propesp,decreasing=TRUE)])
+
+    # We are taking all species until having at least seuil% of total catch
+    nbMainSpeciesTotal=numeric()
+    medianPourcentCatchMainSpeciesTotal=numeric()
+
+    for(seuil in seq(5,100,5)){
+      cat("seuil:",seuil,"\n")
+      pourcent=which(propesp<=seuil)
+      # We are taking the name of selected species
+      espsel=numesp[1:(length(pourcent)+1)]
+      nomespsel=nameSpecies[espsel]
+      nbMainSpeciesTotal[seuil/5]=length(nomespsel)
+
+      if(DiagFlag==TRUE) {
+        # We are building the table with main species and aggregated other species
+        datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel)
+        if(length(nomespsel)==1){
+          vectorNul=rep(0,n)
+          datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul)
+        }
+        pourcentCatchMainSpeciesTotal=apply(datSpeciesWithoutProp,1,sum, na.rm=TRUE)/apply(dat[,2:p],1,sum, na.rm=TRUE)*100
+        medianPourcentCatchMainSpeciesTotal[seuil/5]=median(pourcentCatchMainSpeciesTotal)
+      }
+    }
+    nbMainSpeciesTotal=c(0,nbMainSpeciesTotal)
+    nbMainSpeciesTotal[length(nbMainSpeciesTotal)]=p-1
+    namesMainSpeciesTotal=nomespsel[1:nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]]
+    propNbMainSpeciesTotal=nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]/nbAllSpecies*100
+    
+    if (DiagFlag) medianPourcentCatchMainSpeciesTotal=c(0,medianPourcentCatchMainSpeciesTotal)
+
+    Store(objects())
+    gc(reset=TRUE)
+      
+    print(Sys.time()-t1)
+
+
+
+    # METHOD : 'LOGEVENT'
+    
+    print("######## SPECIES EXPLORATION METHOD 3: 'LOGEVENT' ########")
+    
+    nbMainSpeciesLogevent=numeric()
+    medianPourcentCatchMainSpeciesLogevent=numeric()
+
+    for(seuil in seq(5,100,5)){
+      cat("seuil:",seuil,"\n")
+      nomespsel=character()
+      # We are taking all species with a % of catch >= seuil% for at least one logevent
+      for (i in nameSpecies) if (!is.na(any(propdat[,i]>=seuil)) && any(propdat[,i]>=seuil)) nomespsel <- c(nomespsel,i)
+      nbMainSpeciesLogevent[seuil/5]=length(nomespsel)
+   
+      # We are building the table with main species and aggregated other species
+      if(DiagFlag==TRUE) {
+        datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel)
+        if(length(nomespsel)==1){
+          vectorNul=rep(0,n)
+          datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul)
+        }
+        pourcentCatchMainSpeciesLogevent=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100
+        medianPourcentCatchMainSpeciesLogevent[seuil/5]=median(pourcentCatchMainSpeciesLogevent)
+      }
+    }
+    nbMainSpeciesLogevent=c(p-1,nbMainSpeciesLogevent)
+    namesMainSpeciesLogevent=nomespsel
+    propNbMainSpeciesLogevent=nbMainSpeciesLogevent[length(nbMainSpeciesLogevent)]/nbAllSpecies*100
+    
+    
+    if(DiagFlag) medianPourcentCatchMainSpeciesLogevent=c(100,medianPourcentCatchMainSpeciesLogevent)
+
+    print(Sys.time()-t1)
+
+    # GRAPHICS
+
+    # Number of main species
+    X11(5,5)
+    plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species")
+    lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3)
+    if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3)
+    axis(1)
+    axis(2, las=2)
+    box()
+    legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n")
+    savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png")
+    dev.off()
+
+    X11(5,5)
+    plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species")
+    lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3)
+    if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3)
+    axis(1)
+    axis(2, las=2)
+    box()
+    legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n")
+    savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png")
+    dev.off()
+
+    # Black and white version
+    X11(5,5)
+    plot(seq(0,100,5),nbMainSpeciesTotal, type='l' ,lty='dashed', col="black",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species")
+    lines(seq(0,100,5),nbMainSpeciesLogevent, type='l', lty='dotted', col="black",lwd=3)
+    if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="black",lwd=3)
+    axis(1)
+    axis(2, las=2)
+    box()
+    legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("black", "black", "black"),bty="n",lty=c('solid','dashed','dotted'),box.lty = par("lty"))
+    savePlot(filename = paste(analysisName,'Number of main species_new_2',sep="_"),type ="png")
+    dev.off()
+
+    # Median percentage of catch represented by main species by logevent
+    if(DiagFlag){
+      png(paste(analysisName,"Median percentage of catch represented by main species by logevent.png",sep="_"), width = 1200, height = 800)
+      plot(seq(0,100,5),medianPourcentCatchMainSpeciesTotal,type='l',col="blue",lwd=2, main="Median percentage of catch represented by main species by logevent depending of the threshold", xlab="Threshold (%)",ylab="Median percentage of catch represented by main species by logevent")
+      lines(seq(0,100,5),medianPourcentCatchMainSpeciesLogevent,col="green",lwd=2)
+      if (RunHAC==TRUE) abline(medianPourcentCatchMainSpeciesHAC,0, col="red",lwd=2)
+      mtext(paste(p-1," Species"),col='darkblue')
+      if (RunHAC==TRUE) legend(70, 40, c("HAC", "Total", "Logevent"),lwd=2,col=c("red", "blue", "green"))
+      if (RunHAC==FALSE) legend(70, 40, c("Total", "Logevent"),lwd=2,col=c("blue", "green"))
+      dev.off()
+    }
+    
+    listSpecies=sort(unique(c(namesMainSpeciesHAC,namesMainSpeciesTotal,namesMainSpeciesLogevent)))
+
+    # Proportion of the total catch represented by the species in listSpecies (= namesMainSpeciesAll)
+    catchListSpecies=sumcol[listSpecies]
+    propCatchListSpecies=sum(catchListSpecies)/sum(sumcol)*100
+    
+
+    if(DiagFlag==FALSE) { 
+      explo_species = list(nbAllSpecies=nbAllSpecies,
+                            propNbMainSpeciesHAC=propNbMainSpeciesHAC,
+                            propNbMainSpeciesTotal=propNbMainSpeciesTotal,
+                            propNbMainSpeciesLogevent=propNbMainSpeciesLogevent,
+                            nbMainSpeciesHAC=nbMainSpeciesHAC, 
+                            nbMainSpeciesTotal=nbMainSpeciesTotal, 
+                            nbMainSpeciesLogevent=nbMainSpeciesLogevent,
+                            namesMainSpeciesHAC=sort(namesMainSpeciesHAC), 
+                            namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal),                                             
+                            namesMainSpeciesTotalByImportance=namesMainSpeciesTotal,
+                            namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent),
+                            namesMainSpeciesAll=listSpecies,
+                            propCatchMainSpeciesAll=propCatchListSpecies) 
+    }else{         
+      explo_species = list(nbAllSpecies=nbAllSpecies,
+                            propNbMainSpeciesHAC=propNbMainSpeciesHAC,
+                            propNbMainSpeciesTotal=propNbMainSpeciesTotal,
+                            propNbMainSpeciesLogevent=propNbMainSpeciesLogevent,
+                            nbMainSpeciesHAC=nbMainSpeciesHAC, 
+                            nbMainSpeciesTotal=nbMainSpeciesTotal, 
+                            nbMainSpeciesLogevent=nbMainSpeciesLogevent,
+                            namesMainSpeciesHAC=sort(namesMainSpeciesHAC), 
+                            namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal),                                             
+                            namesMainSpeciesTotalByImportance=namesMainSpeciesTotal,
+                            namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent),
+                            namesMainSpeciesAll=listSpecies,
+                            medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC),
+                            medianPourcentCatchMainSpeciesTotal=medianPourcentCatchMainSpeciesTotal,
+                            medianPourcentCatchMainSpeciesLogevent=medianPourcentCatchMainSpeciesLogevent,
+                            propCatchMainSpeciesAll=propCatchListSpecies)
+    }    
+
+    return(explo_species)
+    
+}
diff --git a/vmstools/R/select_species.r b/vmstools/R/select_species.r
index 229054e..1ab36ef 100644
--- a/vmstools/R/select_species.r
+++ b/vmstools/R/select_species.r
@@ -1,17 +1,45 @@
-########################################################
-# Remove the cluster with the smallest mean of capture #
-########################################################
-
-select_species=function(data,groupes_cah){
-  nb.classes=length(levels(as.factor(groupes_cah)))
-  moyennes=numeric(nb.classes)
-  for(i in 1:nb.classes){
-    namegp=names(which(groupes_cah==i))
-    effgp=length(which(groupes_cah==i))
-    moyennes[i]=sum(data[namegp],na.rm=TRUE)/effgp
-  }
-  indice.autre=which(moyennes == min(moyennes,na.rm=TRUE))
-  noms=names(which(groupes_cah!=indice.autre))
-  return(list(noms,indice.autre))
-}
-
+########################################################
+# Remove the cluster with the smallest mean of capture #
+########################################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export select_species
+select_species=function(data,groupes_cah){
+  nb.classes=length(levels(as.factor(groupes_cah)))
+  moyennes=numeric(nb.classes)
+  for(i in 1:nb.classes){
+    namegp=names(which(groupes_cah==i))
+    effgp=length(which(groupes_cah==i))
+    moyennes[i]=sum(data[namegp],na.rm=TRUE)/effgp
+  }
+  indice.autre=which(moyennes == min(moyennes,na.rm=TRUE))
+  noms=names(which(groupes_cah!=indice.autre))
+  return(list(noms,indice.autre))
+}
+
diff --git a/vmstools/R/shortenEflalo.r b/vmstools/R/shortenEflalo.r
index 1e8bd99..ed5d8bb 100644
--- a/vmstools/R/shortenEflalo.r
+++ b/vmstools/R/shortenEflalo.r
@@ -1,9 +1,27 @@
-shortenEflalo <- function(data=eflalo2,which.species = c("ANE","BIB","BLL","COD","DAB","HAD","HER","MAC","NEP","PLE","SOL","WHG") )
-{
-#Eflalo is an unwieldy format so this is handy if you want to select a few species
-dn <- dimnames(data)[[2]]
-yp <- NULL
-for(ss in which.species){yp <- c(yp,grep(ss,dn)) }
-short.eflalo2 <- data[,c(1:26,yp)]
-short.eflalo2
-}
\ No newline at end of file
+#' Cuts up eflalo according to any particular combination of species to make it
+#' more manageable
+#' 
+#' Cuts up eflalo according to any particular combination of species to make it
+#' more manageable
+#' 
+#' 
+#' @param data eflalo formatted data
+#' @param which.species array of species names in FAO species names
+#' @author Doug Beare
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(eflalo)
+#' shortenEflalo(data = eflalo, which.species = c("PLE"))
+#' 
+#' 
+#' @export shortenEflalo
+shortenEflalo <- function(data=eflalo2,which.species = c("ANE","BIB","BLL","COD","DAB","HAD","HER","MAC","NEP","PLE","SOL","WHG") )
+{
+#Eflalo is an unwieldy format so this is handy if you want to select a few species
+dn <- dimnames(data)[[2]]
+yp <- NULL
+for(ss in which.species){yp <- c(yp,grep(ss,dn)) }
+short.eflalo2 <- data[,c(1:26,yp)]
+short.eflalo2
+}
diff --git a/vmstools/R/sortTacsat.r b/vmstools/R/sortTacsat.r
index 2425304..8aab895 100644
--- a/vmstools/R/sortTacsat.r
+++ b/vmstools/R/sortTacsat.r
@@ -1,13 +1,34 @@
-`sortTacsat` <-
-function(dat){
-require(doBy)
-
-if(!"SI_DATIM" %in% colnames(dat)) dat$SI_DATIM  <- as.POSIXct(paste(dat$SI_DATE,  dat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-
-  #Sort the tacsat data first by ship, then by date
-if("VE_REF" %in% colnames(dat)) dat <- orderBy(~VE_REF+SI_DATIM,data=dat)
-if("OB_REF" %in% colnames(dat)) dat <- orderBy(~OB_REF+SI_DATIM,data=dat)
-
-return(dat)}
-
-                                                
\ No newline at end of file
+#' Sorting Tacsat dataframe
+#' 
+#' Sort the Tacsat data first by vessel, then by date, speed and heading. Needs
+#' to be in this order to be effectively used in other EU lot 2 project generic
+#' functions.
+#' 
+#' 
+#' @param dat tacsat dataframe
+#' @note Uses library(doBy)
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{filterTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' data(tacsat)
+#' require(doBy)
+#' 
+#'   #Sort the Tacsat data
+#' tacsat     <- sortTacsat(tacsat)
+#' 
+#' @export sortTacsat
+`sortTacsat` <-
+function(dat){
+require(doBy)
+
+if(!"SI_DATIM" %in% colnames(dat)) dat$SI_DATIM  <- as.POSIXct(paste(dat$SI_DATE,  dat$SI_TIME,   sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+
+  #Sort the tacsat data first by ship, then by date
+if("VE_REF" %in% colnames(dat)) dat <- orderBy(~VE_REF+SI_DATIM,data=dat)
+if("OB_REF" %in% colnames(dat)) dat <- orderBy(~OB_REF+SI_DATIM,data=dat)
+
+return(dat)}
+
+                                                
diff --git a/vmstools/R/splitAmongPings.r b/vmstools/R/splitAmongPings.r
index 41ceff0..e160046 100644
--- a/vmstools/R/splitAmongPings.r
+++ b/vmstools/R/splitAmongPings.r
@@ -1,292 +1,353 @@
-splitAmongPings <- function(tacsat,eflalo,variable="all",level="day",conserve=TRUE,by=NULL,returnAll=T){
-  
-  #level: day,ICESrectangle,trip
-  #conserve: T,F
-  #variable: kgs,value,effort
-  require(data.table)
-  #- Create extra columns with time stamps
-  if(!"FT_REF" %in% colnames(tacsat)) stop("tacsat file needs FT_REF detailing trip number")
-  if(!"SI_STATE" %in% colnames(tacsat)) stop("tacsat file needs SI_STATE detailing activity of vessel")
-  if(level == "trip" & conserve == TRUE) stop("conserve catches only at level = ICESrectangle or day")
-  
-  if(!"SI_DATIM" %in%   colnames(tacsat)) tacsat$SI_DATIM     <- as.POSIXct(paste(tacsat$SI_DATE,   tacsat$SI_TIME,     sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
-  if(!"LE_CDATIM" %in%  colnames(eflalo)) eflalo$LE_CDATIM    <- as.POSIXct(eflalo$LE_CDAT,                                     tz="GMT", format="%d/%m/%Y")
-  
-  if(is.null(by)==FALSE){
-    if(any(is.na(tacsat[,by])) | any(tacsat[,by] == 0)) stop("'by' column in tacsat contains NA or zero's. Cannot execute with NA's or zeros")
-  }
-  
-  #- Levels have hierachy, and need to be suplemented with lower levels
-  if(level == "day"){               level <- c("day","ICESrectangle","trip")
-  } else {
-    if(level == "ICESrectangle"){ level <- c("ICESrectangle","trip")
-    } else {
-      if(level == "trip"){
-        level <- c("trip")
-      }
-    }
-  }
-  
-  #- Add ID to keep track of merged and non-merged sets
-  tacsat$ID               <- 1:nrow(tacsat)
-  
-  #- identifyers of eflalo colnames
-  eflaloCol               <- colnames(eflalo)
-  kgs                     <- grep("LE_KG",colnames(eflalo))
-  eur                     <- grep("LE_EURO",colnames(eflalo))
-  
-  #- Subset tacsat file
-  remtacsat               <- subset(tacsat,SI_STATE==0)
-  tacsat                  <- subset(tacsat,SI_STATE != 0) #only attribute variable to fishing pings
-  tacsatTrip              <- subset(tacsat,FT_REF != 0)
-  remainTacsat            <- sort(unique(tacsatTrip$ID))
-  
-  #- Subset eflalo file
-  eflalo$ID               <- 1:nrow(eflalo)
-  eflaloTrip              <- subset(eflalo,       FT_REF %in% sort(unique(tacsatTrip$FT_REF)) & VE_REF %in% sort(unique(tacsatTrip$VE_REF)))
-  #eflaloNoTrip            <- subset(eflalo,      !FT_REF %in% sort(unique(tacsatTrip$FT_REF)))
-  eflaloNoTrip            <- eflalo[which(!eflalo$ID %in% eflaloTrip$ID),-match("ID",colnames(eflalo))]
-  #eflaloVessel            <- subset(eflaloNoTrip, VE_REF %in% sort(unique(tacsatTrip$VE_REF)))
-  #eflaloNoVessel          <- subset(eflaloNoTrip,!VE_REF %in% sort(unique(tacsatTrip$VE_REF)))
-  eflaloVessel            <- eflaloNoTrip[which(paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),]
-  eflaloNoVessel          <- eflaloNoTrip[which(!paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),]
-  
-  #-------------------------------------------------------------------------------
-  # 1a) Merge eflalo to tacsat with matching FT_REF
-  #-------------------------------------------------------------------------------
-  
-  if(dim(tacsatTrip)[1]>0 & dim(eflaloTrip)[1] >0){
-    if("day" %in% level){
-      print("level: day")
-      if(!"SI_YEAR" %in% colnames(tacsatTrip))  tacsatTrip$SI_YEAR    <- an(format(tacsatTrip$SI_DATIM,format="%Y"))
-      if(!"SI_DAY" %in%  colnames(tacsatTrip))  tacsatTrip$SI_DAY     <- an(format(tacsatTrip$SI_DATIM,format="%j"))
-      if(!"LE_RECT" %in% colnames(tacsatTrip))  tacsatTrip$LE_RECT    <- ICESrectangle(tacsatTrip)
-      
-      if(!"SI_YEAR" %in% colnames(eflaloTrip))  eflaloTrip$SI_YEAR    <- an(format(eflaloTrip$LE_CDATIM,format="%Y"))
-      if(!"SI_DAY" %in%  colnames(eflaloTrip))  eflaloTrip$SI_DAY     <- an(format(eflaloTrip$LE_CDATIM,format="%j"))
-      
-      #- Count pings in tacsat set
-      nPings                <- countPings(~year+VE_REF+FT_REF+icesrectangle+day,tacsatTrip,by=by)
-      
-      #- Do the merging of eflalo to tacsat
-      res           <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT","SI_DAY"),eflaloCol[c(kgs,eur)],remainTacsat,by=by)
-      eflaloTrip    <- res[["eflalo"]]
-      byDayTacsat   <- res[["tacsat"]]
-      remainTacsat  <- res[["remainTacsat"]]
-    }
-    if("ICESrectangle" %in% level){
-      print("level: rectangle")
-      if(!"SI_YEAR" %in% colnames(tacsatTrip))  tacsatTrip$SI_YEAR    <- an(format(tacsatTrip$SI_DATIM,format="%Y"))
-      if(!"LE_RECT" %in% colnames(tacsatTrip))  tacsatTrip$LE_RECT    <- ICESrectangle(tacsatTrip)
-      
-      if(!"SI_YEAR" %in% colnames(eflaloTrip))  eflaloTrip$SI_YEAR    <- an(format(eflaloTrip$LE_CDATIM,format="%Y"))
-      
-      #- Count pings in tacsat set
-      nPings                <- countPings(~year+VE_REF+FT_REF+icesrectangle,tacsatTrip,by=by)
-      
-      #- Do the merging of eflalo to tacsat
-      res           <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT"),        eflaloCol[c(kgs,eur)],remainTacsat,by=by)
-      eflaloTrip    <- res[["eflalo"]]
-      byRectTacsat  <- res[["tacsat"]]
-      remainTacsat  <- res[["remainTacsat"]]
-    }
-    if("trip" %in% level){
-      print("level: trip")
-      if(!"SI_YEAR" %in% colnames(tacsatTrip))  tacsatTrip$SI_YEAR    <- an(format(tacsatTrip$SI_DATIM,format="%Y"))
-      if(!"SI_YEAR" %in% colnames(eflaloTrip))  eflaloTrip$SI_YEAR    <- an(format(eflaloTrip$LE_CDATIM,format="%Y"))
-      
-      #- Count pings in tacsat set
-      nPings                <- countPings(~year+VE_REF+FT_REF,tacsatTrip,by=by)
-      
-      #- Do the merging of eflalo to tacsat
-      res           <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF"),                  eflaloCol[c(kgs,eur)],remainTacsat,by=by)
-      eflaloTrip    <- res[["eflalo"]]
-      byTripTacsat  <- res[["tacsat"]]
-      remainTacsat  <- res[["remainTacsat"]]
-    }
-    #-------------------------------------------------------------------------------
-    # 1b) Bind all tacsat files with matching FT_REF
-    #-------------------------------------------------------------------------------
-    
-    if(length(remainTacsat) > 0) warning("Not all tacsat records with tripnumber have been merged!!")
-    if(nrow(eflaloTrip) > 0) warning("Not all eflalo records with matching VMS tripnumber have been merged!!")
-    if("day" %in% level){ tacsatFTREF <- rbind(byDayTacsat,byRectTacsat,byTripTacsat)
-    } else {
-      if("ICESrectangle" %in% level){ tacsatFTREF <- rbind(byRectTacsat,byTripTacsat)
-      } else { tacsatFTREF <- byTripTacsat}}
-    tacsatFTREF[,kgeur(colnames(tacsatFTREF))]    <- sweep(tacsatFTREF[,kgeur(colnames(tacsatFTREF))],1,tacsatFTREF$pings,"/")
-    tacsatFTREF$ID                                <- af(ac(tacsatFTREF$ID.x))
-    DT                                            <- data.table(tacsatFTREF)
-    eq1                                           <- c.listquote(paste("sum(",colnames(tacsatFTREF[,kgeur(colnames(tacsatFTREF))]),",na.rm=TRUE)",sep=""))
-    tacsatFTREF                                   <- DT[,eval(eq1),by=ID.x]; tacsatFTREF <- data.frame(tacsatFTREF); setnames(tacsatFTREF,colnames(tacsatFTREF),c("ID",colnames(eflaloTrip[,kgeur(colnames(eflaloTrip))])))
-  }
-  
-  #-------------------------------------------------------------------------------
-  # 2a) Merge eflalo to tacsat with no matching FT_REF
-  #-------------------------------------------------------------------------------
-  
-  #- If you don't want to loose catch or value data, conserve the non-merged
-  #   eflalo catches and distribute these over the tacsat records
-  if(conserve == TRUE){
-    if(dim(tacsat)[1]>0 & dim(eflaloVessel)[1] > 0){
-      
-      #-------------------------------------------------------------------------------
-      # 2a-1) Merge eflalo to tacsat with matching VE_REF
-      #-------------------------------------------------------------------------------
-      
-      if("day" %in% level){
-        print("level: day & conserve = T, by vessel")
-        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR                <- an(format(tacsat$SI_DATIM,format="%Y"))
-        if(!"SI_DAY" %in%  colnames(tacsat))  tacsat$SI_DAY                 <- an(format(tacsat$SI_DATIM,format="%j"))
-        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT                <- ICESrectangle(tacsat)
-        
-        if(!"SI_YEAR" %in% colnames(eflaloVessel))  eflaloVessel$SI_YEAR    <- an(format(eflaloVessel$LE_CDATIM,format="%Y"))
-        if(!"SI_DAY" %in%  colnames(eflaloVessel))  eflaloVessel$SI_DAY     <- an(format(eflaloVessel$LE_CDATIM,format="%j"))
-        
-        #- Count pings in tacsat set
-        nPings                <- countPings(~year+VE_REF+icesrectangle+day,tacsat,by=by)
-        
-        #- Do the merging of eflalo to tacsat
-        res           <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT","SI_DAY"),      eflaloCol[c(kgs,eur)],NULL,by=by)
-        eflaloVessel  <- res[["eflalo"]]
-        byDayTacsat   <- res[["tacsat"]]
-      }
-      
-      if("ICESrectangle" %in% level){
-        print("level: rectangle & conserve = T, by vessel")
-        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR                <- an(format(tacsat$SI_DATIM,format="%Y"))
-        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT                <- ICESrectangle(tacsat)
-        
-        if(!"SI_YEAR" %in% colnames(eflaloVessel))  eflaloVessel$SI_YEAR    <- an(format(eflaloVessel$LE_CDATIM,format="%Y"))
-        
-        #- Count pings in tacsat set
-        nPings                <- countPings(~year+VE_REF+icesrectangle,tacsat,by=by)
-        
-        #- Do the merging of eflalo to tacsat
-        res           <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT"),               eflaloCol[c(kgs,eur)],NULL,by=by)
-        eflaloVessel  <- res[["eflalo"]]
-        byRectTacsat  <- res[["tacsat"]]
-      }
-      if(TRUE){ #-For remainder of vessel merging not at ICESrectangle level
-        print("level: year & conserve = T, by vessel")
-        if(!"SI_YEAR" %in% colnames(tacsat))              tacsat$SI_YEAR    <- an(format(tacsat$SI_DATIM,format="%Y"))
-        if(!"SI_YEAR" %in% colnames(eflaloVessel))  eflaloVessel$SI_YEAR    <- an(format(eflaloVessel$LE_CDATIM,format="%Y"))
-        
-        #- Count pings in tacsat set
-        nPings                <- countPings(~year+VE_REF,tacsat,by=by)
-        
-        #- Do the merging of eflalo to tacsat
-        res           <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF" ),               eflaloCol[c(kgs,eur)],NULL,by=by)
-        eflaloVessel  <- res[["eflalo"]]
-        byVessTacsat  <- res[["tacsat"]]
-      }
-      
-      #-------------------------------------------------------------------------------
-      # 2b-1) Bind all tacsat files with matching VE_REF
-      #-------------------------------------------------------------------------------
-      
-      if("day" %in% level){ tacsatVEREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat)
-      } else {
-        if("ICESrectangle" %in% level){
-          tacsatVEREF <- rbind(byRectTacsat,byVessTacsat)
-        } else { tacsatVEREF <- byVessTacsat}}
-      tacsatVEREF[,kgeur(colnames(tacsatVEREF))]    <- sweep(tacsatVEREF[,kgeur(colnames(tacsatVEREF))],1,tacsatVEREF$pings,"/")
-      tacsatVEREF$ID                                <- af(ac(tacsatVEREF$ID.x))
-      DT                                            <- data.table(tacsatVEREF)
-      eq1                                           <- c.listquote(paste("sum(",colnames(tacsatVEREF[,kgeur(colnames(tacsatVEREF))]),",na.rm=TRUE)",sep=""))
-      tacsatVEREF                                   <- DT[,eval(eq1),by=ID.x]; tacsatVEREF <- data.frame(tacsatVEREF); setnames(tacsatVEREF,colnames(tacsatVEREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))])))
-    }
-    
-    if(dim(tacsat)[1] > 0 & dim(eflaloNoVessel)[1] > 0){
-      #-------------------------------------------------------------------------------
-      # 2a-2) Merge eflalo to tacsat with no matching FT_REF or VE_REF
-      #-------------------------------------------------------------------------------
-      if("day" %in% level){
-        print("level: day & conserve = T, no vessel match")
-        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR                    <- an(format(tacsat$SI_DATIM,format="%Y"))
-        if(!"SI_DAY" %in%  colnames(tacsat))  tacsat$SI_DAY                     <- an(format(tacsat$SI_DATIM,format="%j"))
-        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT                    <- ICESrectangle(tacsat)
-        
-        if(!"SI_YEAR" %in% colnames(eflaloNoVessel))  eflaloNoVessel$SI_YEAR    <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y"))
-        if(!"SI_DAY" %in%  colnames(eflaloNoVessel))  eflaloNoVessel$SI_DAY     <- an(format(eflaloNoVessel$LE_CDATIM,format="%j"))
-        
-        #- Count pings in tacsat set
-        nPings                <- countPings(~year+icesrectangle+day,tacsat,by=by)
-        
-        #- Do the merging of eflalo to tacsat
-        res               <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT","SI_DAY"),               eflaloCol[c(kgs,eur)],NULL,by=by)
-        eflaloNoVessel    <- res[["eflalo"]]
-        byDayTacsat       <- res[["tacsat"]]
-      }
-      
-      if("ICESrectangle" %in% level){
-        print("level: rectangle & conserve = T, no vessel match")
-        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR    <- an(format(tacsat$SI_DATIM,format="%Y"))
-        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT    <- ICESrectangle(tacsat)
-        
-        if(!"SI_YEAR" %in% colnames(eflaloNoVessel))  eflaloNoVessel$SI_YEAR    <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y"))
-        
-        #- Count pings in tacsat set
-        nPings            <- countPings(~year+icesrectangle,tacsat,by=by)
-        
-        #- Do the merging of eflalo to tacsat
-        res               <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT"),                        eflaloCol[c(kgs,eur)],NULL,by=by)
-        eflaloNoVessel    <- res[["eflalo"]]
-        byRectTacsat      <- res[["tacsat"]]
-      }
-      if(TRUE){ #-For remainder of merging not at ICESrectangle level
-        print("level: year & conserve = T, no vessel match")
-        if(!"SI_YEAR" %in% colnames(tacsat))          tacsat$SI_YEAR            <- an(format(tacsat$SI_DATIM,format="%Y"))
-        if(!"SI_YEAR" %in% colnames(eflaloNoVessel))  eflaloNoVessel$SI_YEAR    <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y"))
-        
-        #- Count pings in tacsat set
-        nPings            <- countPings(~year,tacsat,by=by)
-        
-        #- Do the merging of eflalo to tacsat
-        res               <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR"),                        eflaloCol[c(kgs,eur)],NULL,by=by)
-        eflaloNoVessel    <- res[["eflalo"]]
-        byVessTacsat      <- res[["tacsat"]]
-      }
-      #-------------------------------------------------------------------------------
-      # 2b-2) Bind all tacsat files with no matching FT_REF or VE_REF
-      #-------------------------------------------------------------------------------
-      
-      if("day" %in% level){ tacsatREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat)
-      } else {
-        if("ICESrectangle" %in% level){ tacsatREF <- rbind(byRectTacsat,byVessTacsat)
-        } else { tacsatREF <- byVessTacsat}}
-      tacsatREF[,kgeur(colnames(tacsatREF))]      <- sweep(tacsatREF[,kgeur(colnames(tacsatREF))],1,tacsatREF$pings,"/")
-      tacsatREF$ID                                <- af(ac(tacsatREF$ID.x))
-      DT                                          <- data.table(tacsatREF)
-      eq1                                         <- c.listquote(paste("sum(",colnames(tacsatREF[,kgeur(colnames(tacsatREF))]),",na.rm=TRUE)",sep=""))
-      tacsatREF                                   <- DT[,eval(eq1),by=ID.x]; tacsatREF <- data.frame(tacsatREF); setnames(tacsatREF,colnames(tacsatREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))])))
-    }
-  }#End conserve
-  
-  #-------------------------------------------------------------------------------
-  # 3) Merge all tacsat files together and return
-  #-------------------------------------------------------------------------------
-  
-  if(conserve==TRUE){
-    if(exists("tacsatFTREF")){one   <- tacsatFTREF} else{ one   <- numeric()}
-    if(exists("tacsatVEREF")){two   <- tacsatVEREF} else{ two   <- numeric()}
-    if(exists("tacsatREF"))  {three <- tacsatREF}   else{ three <- numeric()}
-    tacsatTot       <- rbind(one,two,three)
-    DT              <- data.table(tacsatTot)
-    eq1             <- c.listquote(paste("sum(",colnames(tacsatTot[,kgeur(colnames(tacsatTot))]),",na.rm=TRUE)",sep=""))
-    tacsatTot       <- DT[,eval(eq1),by=ID]; tacsatTot <- data.frame(tacsatTot); setnames(tacsatTot,colnames(tacsatTot),c("ID",colnames(eflalo[,kgeur(colnames(eflalo))])))
-    tacsatReturn    <- merge(tacsat,tacsatTot,by="ID",all.x=TRUE)
-    if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))]
-    if(variable == "kgs")   tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))]
-    if(variable == "all")   tacsatReturn <- tacsatReturn
-  } else {
-    if(exists("tacsatFTREF")==FALSE){stop("You have selected not to conserve catches, but there is no trip identifier in the tacsat file")}
-    tacsatReturn  <- merge(tacsat,tacsatFTREF,by="ID",all.x=TRUE)
-    if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))]
-    if(variable == "kgs")   tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))]
-    if(variable == "all")   tacsatReturn <- tacsatReturn
-  }
-  if(returnAll & nrow(remtacsat)>0)
-    tacsatReturn <- orderBy(~ID,data=rbindTacsat(tacsatReturn,remtacsat))
-  
-  return(orderBy(~ID,data=tacsatReturn)[,-match("ID",colnames(tacsatReturn))])}
+#' Split values or landings from eflalo over tacsat pings
+#' 
+#' Split the values or landings as listed in the eflalo file over the tacsat
+#' pings, while taking different levels into account such as by day,
+#' ICESrectangle or by trip number. Also there is a possibility to merge the
+#' eflalo records without a matching tacsat trip.
+#' 
+#' Levels have hierachy, so if "day" is specified, also "ICESrectangle" and
+#' "trip" will be used. If "ICESrectangle" is specified also "trip" will be
+#' used. "Trip" can be used on its own. Same hierachy applies to merging when
+#' conserve = TRUE (except for trip level).
+#' 
+#' Note that tacsat file needs a column SI_STATE which has 0 for non-fishing
+#' and 1 for fishing records.
+#' 
+#' @param tacsat Tacsat object
+#' @param eflalo Eflalo object
+#' @param variable Indicating what to split: "all","value","kgs"
+#' @param level Levels can be: "day", "ICESrectangle", "trip"
+#' @param conserve Logical, if kgs or value needs to be conserved if merging by
+#' trip number is not possible (default = TRUE)
+#' @param by Name of tacsat column by which KG and EURO should be dispatched.
+#' Default to NULL which distributes KG and EURO equally by each ping. A tacsat
+#' column can be used instead to generate a 'weighted' dispatch of KG and EURO.
+#' @param returnAll Logical, whether all non-fishing pings should be returned
+#' as well (default = FALSE)
+#' @return Merged tacsat file will be returned including the splitted values
+#' over the tacsat pings where SI_STATE is not zero.
+#' @author Niels T. Hintzen, Francois Bastardie
+#' @seealso \code{\link{mergeEflalo2Tacsat}}, \code{\link{mergeEflalo2Pings}}
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' data(tacsat); tacsat <- tacsat[1:1000,]
+#' data(eflalo); eflalo <- eflalo[1:1000,]
+#' 
+#' tacsatp           <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' 
+#' #- Create a column names SI_STATE which holds values 0 or 1 which denotes no
+#' #  fishing & fishing.
+#' tacsatp$IDX       <- 1:nrow(tacsatp)
+#' tacsatFilter      <- filterTacsat(tacsatp,st=c(1,6),hd=NULL,remDup=TRUE)
+#' tacsatp$SI_STATE  <- 0
+#' tacsatp$SI_STATE[tacsatFilter$IDX] <- 1
+#' 
+#' #-Add interval to tacsatp
+#' tacsatp           <- intervalTacsat(tacsatp,level="trip",fill.na=TRUE)
+#' 
+#' tacsatp           <- subset(tacsatp,SI_STATE == 1)
+#' 
+#' tacsatEflalo      <- splitAmongPings(tacsat=tacsatp,eflalo=eflalo,
+#'                         variable="all",level="day",conserve=TRUE)
+#'                         
+#'                         
+#' #- When using the 'by' statement, make sure the by column does not contain NA,
+#' #  or zeros
+#' tacsatp           <- subset(tacsatp,!is.na(tacsatp$INTV) | tacsatp$INTV != 0)
+#' tacsatEflalo      <- splitAmongPings(tacsat=tacsatp,eflalo=eflalo,
+#'                         variable="all",level="day",conserve=TRUE,by="INTV")
+#' 
+#' @export splitAmongPings
+splitAmongPings <- function(tacsat,eflalo,variable="all",level="day",conserve=TRUE,by=NULL,returnAll=T){
+  
+  #level: day,ICESrectangle,trip
+  #conserve: T,F
+  #variable: kgs,value,effort
+  require(data.table)
+  #- Create extra columns with time stamps
+  if(!"FT_REF" %in% colnames(tacsat)) stop("tacsat file needs FT_REF detailing trip number")
+  if(!"SI_STATE" %in% colnames(tacsat)) stop("tacsat file needs SI_STATE detailing activity of vessel")
+  if(level == "trip" & conserve == TRUE) stop("conserve catches only at level = ICESrectangle or day")
+  
+  if(!"SI_DATIM" %in%   colnames(tacsat)) tacsat$SI_DATIM     <- as.POSIXct(paste(tacsat$SI_DATE,   tacsat$SI_TIME,     sep=" "), tz="GMT", format="%d/%m/%Y  %H:%M")
+  if(!"LE_CDATIM" %in%  colnames(eflalo)) eflalo$LE_CDATIM    <- as.POSIXct(eflalo$LE_CDAT,                                     tz="GMT", format="%d/%m/%Y")
+  
+  if(is.null(by)==FALSE){
+    if(any(is.na(tacsat[,by])) | any(tacsat[,by] == 0)) stop("'by' column in tacsat contains NA or zero's. Cannot execute with NA's or zeros")
+  }
+  
+  #- Levels have hierachy, and need to be suplemented with lower levels
+  if(level == "day"){               level <- c("day","ICESrectangle","trip")
+  } else {
+    if(level == "ICESrectangle"){ level <- c("ICESrectangle","trip")
+    } else {
+      if(level == "trip"){
+        level <- c("trip")
+      }
+    }
+  }
+  
+  #- Add ID to keep track of merged and non-merged sets
+  tacsat$ID               <- 1:nrow(tacsat)
+  
+  #- identifyers of eflalo colnames
+  eflaloCol               <- colnames(eflalo)
+  kgs                     <- grep("LE_KG",colnames(eflalo))
+  eur                     <- grep("LE_EURO",colnames(eflalo))
+  
+  #- Subset tacsat file
+  remtacsat               <- subset(tacsat,SI_STATE==0)
+  tacsat                  <- subset(tacsat,SI_STATE != 0) #only attribute variable to fishing pings
+  tacsatTrip              <- subset(tacsat,FT_REF != 0)
+  remainTacsat            <- sort(unique(tacsatTrip$ID))
+  
+  #- Subset eflalo file
+  eflalo$ID               <- 1:nrow(eflalo)
+  eflaloTrip              <- subset(eflalo,       FT_REF %in% sort(unique(tacsatTrip$FT_REF)) & VE_REF %in% sort(unique(tacsatTrip$VE_REF)))
+  #eflaloNoTrip            <- subset(eflalo,      !FT_REF %in% sort(unique(tacsatTrip$FT_REF)))
+  eflaloNoTrip            <- eflalo[which(!eflalo$ID %in% eflaloTrip$ID),-match("ID",colnames(eflalo))]
+  #eflaloVessel            <- subset(eflaloNoTrip, VE_REF %in% sort(unique(tacsatTrip$VE_REF)))
+  #eflaloNoVessel          <- subset(eflaloNoTrip,!VE_REF %in% sort(unique(tacsatTrip$VE_REF)))
+  eflaloVessel            <- eflaloNoTrip[which(paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),]
+  eflaloNoVessel          <- eflaloNoTrip[which(!paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),]
+  
+  #-------------------------------------------------------------------------------
+  # 1a) Merge eflalo to tacsat with matching FT_REF
+  #-------------------------------------------------------------------------------
+  
+  if(dim(tacsatTrip)[1]>0 & dim(eflaloTrip)[1] >0){
+    if("day" %in% level){
+      print("level: day")
+      if(!"SI_YEAR" %in% colnames(tacsatTrip))  tacsatTrip$SI_YEAR    <- an(format(tacsatTrip$SI_DATIM,format="%Y"))
+      if(!"SI_DAY" %in%  colnames(tacsatTrip))  tacsatTrip$SI_DAY     <- an(format(tacsatTrip$SI_DATIM,format="%j"))
+      if(!"LE_RECT" %in% colnames(tacsatTrip))  tacsatTrip$LE_RECT    <- ICESrectangle(tacsatTrip)
+      
+      if(!"SI_YEAR" %in% colnames(eflaloTrip))  eflaloTrip$SI_YEAR    <- an(format(eflaloTrip$LE_CDATIM,format="%Y"))
+      if(!"SI_DAY" %in%  colnames(eflaloTrip))  eflaloTrip$SI_DAY     <- an(format(eflaloTrip$LE_CDATIM,format="%j"))
+      
+      #- Count pings in tacsat set
+      nPings                <- countPings(~year+VE_REF+FT_REF+icesrectangle+day,tacsatTrip,by=by)
+      
+      #- Do the merging of eflalo to tacsat
+      res           <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT","SI_DAY"),eflaloCol[c(kgs,eur)],remainTacsat,by=by)
+      eflaloTrip    <- res[["eflalo"]]
+      byDayTacsat   <- res[["tacsat"]]
+      remainTacsat  <- res[["remainTacsat"]]
+    }
+    if("ICESrectangle" %in% level){
+      print("level: rectangle")
+      if(!"SI_YEAR" %in% colnames(tacsatTrip))  tacsatTrip$SI_YEAR    <- an(format(tacsatTrip$SI_DATIM,format="%Y"))
+      if(!"LE_RECT" %in% colnames(tacsatTrip))  tacsatTrip$LE_RECT    <- ICESrectangle(tacsatTrip)
+      
+      if(!"SI_YEAR" %in% colnames(eflaloTrip))  eflaloTrip$SI_YEAR    <- an(format(eflaloTrip$LE_CDATIM,format="%Y"))
+      
+      #- Count pings in tacsat set
+      nPings                <- countPings(~year+VE_REF+FT_REF+icesrectangle,tacsatTrip,by=by)
+      
+      #- Do the merging of eflalo to tacsat
+      res           <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT"),        eflaloCol[c(kgs,eur)],remainTacsat,by=by)
+      eflaloTrip    <- res[["eflalo"]]
+      byRectTacsat  <- res[["tacsat"]]
+      remainTacsat  <- res[["remainTacsat"]]
+    }
+    if("trip" %in% level){
+      print("level: trip")
+      if(!"SI_YEAR" %in% colnames(tacsatTrip))  tacsatTrip$SI_YEAR    <- an(format(tacsatTrip$SI_DATIM,format="%Y"))
+      if(!"SI_YEAR" %in% colnames(eflaloTrip))  eflaloTrip$SI_YEAR    <- an(format(eflaloTrip$LE_CDATIM,format="%Y"))
+      
+      #- Count pings in tacsat set
+      nPings                <- countPings(~year+VE_REF+FT_REF,tacsatTrip,by=by)
+      
+      #- Do the merging of eflalo to tacsat
+      res           <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF"),                  eflaloCol[c(kgs,eur)],remainTacsat,by=by)
+      eflaloTrip    <- res[["eflalo"]]
+      byTripTacsat  <- res[["tacsat"]]
+      remainTacsat  <- res[["remainTacsat"]]
+    }
+    #-------------------------------------------------------------------------------
+    # 1b) Bind all tacsat files with matching FT_REF
+    #-------------------------------------------------------------------------------
+    
+    if(length(remainTacsat) > 0) warning("Not all tacsat records with tripnumber have been merged!!")
+    if(nrow(eflaloTrip) > 0) warning("Not all eflalo records with matching VMS tripnumber have been merged!!")
+    if("day" %in% level){ tacsatFTREF <- rbind(byDayTacsat,byRectTacsat,byTripTacsat)
+    } else {
+      if("ICESrectangle" %in% level){ tacsatFTREF <- rbind(byRectTacsat,byTripTacsat)
+      } else { tacsatFTREF <- byTripTacsat}}
+    tacsatFTREF[,kgeur(colnames(tacsatFTREF))]    <- sweep(tacsatFTREF[,kgeur(colnames(tacsatFTREF))],1,tacsatFTREF$pings,"/")
+    tacsatFTREF$ID                                <- af(ac(tacsatFTREF$ID.x))
+    DT                                            <- data.table(tacsatFTREF)
+    eq1                                           <- c.listquote(paste("sum(",colnames(tacsatFTREF[,kgeur(colnames(tacsatFTREF))]),",na.rm=TRUE)",sep=""))
+    tacsatFTREF                                   <- DT[,eval(eq1),by=ID.x]; tacsatFTREF <- data.frame(tacsatFTREF); setnames(tacsatFTREF,colnames(tacsatFTREF),c("ID",colnames(eflaloTrip[,kgeur(colnames(eflaloTrip))])))
+  }
+  
+  #-------------------------------------------------------------------------------
+  # 2a) Merge eflalo to tacsat with no matching FT_REF
+  #-------------------------------------------------------------------------------
+  
+  #- If you don't want to loose catch or value data, conserve the non-merged
+  #   eflalo catches and distribute these over the tacsat records
+  if(conserve == TRUE){
+    if(dim(tacsat)[1]>0 & dim(eflaloVessel)[1] > 0){
+      
+      #-------------------------------------------------------------------------------
+      # 2a-1) Merge eflalo to tacsat with matching VE_REF
+      #-------------------------------------------------------------------------------
+      
+      if("day" %in% level){
+        print("level: day & conserve = T, by vessel")
+        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR                <- an(format(tacsat$SI_DATIM,format="%Y"))
+        if(!"SI_DAY" %in%  colnames(tacsat))  tacsat$SI_DAY                 <- an(format(tacsat$SI_DATIM,format="%j"))
+        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT                <- ICESrectangle(tacsat)
+        
+        if(!"SI_YEAR" %in% colnames(eflaloVessel))  eflaloVessel$SI_YEAR    <- an(format(eflaloVessel$LE_CDATIM,format="%Y"))
+        if(!"SI_DAY" %in%  colnames(eflaloVessel))  eflaloVessel$SI_DAY     <- an(format(eflaloVessel$LE_CDATIM,format="%j"))
+        
+        #- Count pings in tacsat set
+        nPings                <- countPings(~year+VE_REF+icesrectangle+day,tacsat,by=by)
+        
+        #- Do the merging of eflalo to tacsat
+        res           <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT","SI_DAY"),      eflaloCol[c(kgs,eur)],NULL,by=by)
+        eflaloVessel  <- res[["eflalo"]]
+        byDayTacsat   <- res[["tacsat"]]
+      }
+      
+      if("ICESrectangle" %in% level){
+        print("level: rectangle & conserve = T, by vessel")
+        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR                <- an(format(tacsat$SI_DATIM,format="%Y"))
+        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT                <- ICESrectangle(tacsat)
+        
+        if(!"SI_YEAR" %in% colnames(eflaloVessel))  eflaloVessel$SI_YEAR    <- an(format(eflaloVessel$LE_CDATIM,format="%Y"))
+        
+        #- Count pings in tacsat set
+        nPings                <- countPings(~year+VE_REF+icesrectangle,tacsat,by=by)
+        
+        #- Do the merging of eflalo to tacsat
+        res           <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT"),               eflaloCol[c(kgs,eur)],NULL,by=by)
+        eflaloVessel  <- res[["eflalo"]]
+        byRectTacsat  <- res[["tacsat"]]
+      }
+      if(TRUE){ #-For remainder of vessel merging not at ICESrectangle level
+        print("level: year & conserve = T, by vessel")
+        if(!"SI_YEAR" %in% colnames(tacsat))              tacsat$SI_YEAR    <- an(format(tacsat$SI_DATIM,format="%Y"))
+        if(!"SI_YEAR" %in% colnames(eflaloVessel))  eflaloVessel$SI_YEAR    <- an(format(eflaloVessel$LE_CDATIM,format="%Y"))
+        
+        #- Count pings in tacsat set
+        nPings                <- countPings(~year+VE_REF,tacsat,by=by)
+        
+        #- Do the merging of eflalo to tacsat
+        res           <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF" ),               eflaloCol[c(kgs,eur)],NULL,by=by)
+        eflaloVessel  <- res[["eflalo"]]
+        byVessTacsat  <- res[["tacsat"]]
+      }
+      
+      #-------------------------------------------------------------------------------
+      # 2b-1) Bind all tacsat files with matching VE_REF
+      #-------------------------------------------------------------------------------
+      
+      if("day" %in% level){ tacsatVEREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat)
+      } else {
+        if("ICESrectangle" %in% level){
+          tacsatVEREF <- rbind(byRectTacsat,byVessTacsat)
+        } else { tacsatVEREF <- byVessTacsat}}
+      tacsatVEREF[,kgeur(colnames(tacsatVEREF))]    <- sweep(tacsatVEREF[,kgeur(colnames(tacsatVEREF))],1,tacsatVEREF$pings,"/")
+      tacsatVEREF$ID                                <- af(ac(tacsatVEREF$ID.x))
+      DT                                            <- data.table(tacsatVEREF)
+      eq1                                           <- c.listquote(paste("sum(",colnames(tacsatVEREF[,kgeur(colnames(tacsatVEREF))]),",na.rm=TRUE)",sep=""))
+      tacsatVEREF                                   <- DT[,eval(eq1),by=ID.x]; tacsatVEREF <- data.frame(tacsatVEREF); setnames(tacsatVEREF,colnames(tacsatVEREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))])))
+    }
+    
+    if(dim(tacsat)[1] > 0 & dim(eflaloNoVessel)[1] > 0){
+      #-------------------------------------------------------------------------------
+      # 2a-2) Merge eflalo to tacsat with no matching FT_REF or VE_REF
+      #-------------------------------------------------------------------------------
+      if("day" %in% level){
+        print("level: day & conserve = T, no vessel match")
+        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR                    <- an(format(tacsat$SI_DATIM,format="%Y"))
+        if(!"SI_DAY" %in%  colnames(tacsat))  tacsat$SI_DAY                     <- an(format(tacsat$SI_DATIM,format="%j"))
+        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT                    <- ICESrectangle(tacsat)
+        
+        if(!"SI_YEAR" %in% colnames(eflaloNoVessel))  eflaloNoVessel$SI_YEAR    <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y"))
+        if(!"SI_DAY" %in%  colnames(eflaloNoVessel))  eflaloNoVessel$SI_DAY     <- an(format(eflaloNoVessel$LE_CDATIM,format="%j"))
+        
+        #- Count pings in tacsat set
+        nPings                <- countPings(~year+icesrectangle+day,tacsat,by=by)
+        
+        #- Do the merging of eflalo to tacsat
+        res               <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT","SI_DAY"),               eflaloCol[c(kgs,eur)],NULL,by=by)
+        eflaloNoVessel    <- res[["eflalo"]]
+        byDayTacsat       <- res[["tacsat"]]
+      }
+      
+      if("ICESrectangle" %in% level){
+        print("level: rectangle & conserve = T, no vessel match")
+        if(!"SI_YEAR" %in% colnames(tacsat))  tacsat$SI_YEAR    <- an(format(tacsat$SI_DATIM,format="%Y"))
+        if(!"LE_RECT" %in% colnames(tacsat))  tacsat$LE_RECT    <- ICESrectangle(tacsat)
+        
+        if(!"SI_YEAR" %in% colnames(eflaloNoVessel))  eflaloNoVessel$SI_YEAR    <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y"))
+        
+        #- Count pings in tacsat set
+        nPings            <- countPings(~year+icesrectangle,tacsat,by=by)
+        
+        #- Do the merging of eflalo to tacsat
+        res               <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT"),                        eflaloCol[c(kgs,eur)],NULL,by=by)
+        eflaloNoVessel    <- res[["eflalo"]]
+        byRectTacsat      <- res[["tacsat"]]
+      }
+      if(TRUE){ #-For remainder of merging not at ICESrectangle level
+        print("level: year & conserve = T, no vessel match")
+        if(!"SI_YEAR" %in% colnames(tacsat))          tacsat$SI_YEAR            <- an(format(tacsat$SI_DATIM,format="%Y"))
+        if(!"SI_YEAR" %in% colnames(eflaloNoVessel))  eflaloNoVessel$SI_YEAR    <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y"))
+        
+        #- Count pings in tacsat set
+        nPings            <- countPings(~year,tacsat,by=by)
+        
+        #- Do the merging of eflalo to tacsat
+        res               <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR"),                        eflaloCol[c(kgs,eur)],NULL,by=by)
+        eflaloNoVessel    <- res[["eflalo"]]
+        byVessTacsat      <- res[["tacsat"]]
+      }
+      #-------------------------------------------------------------------------------
+      # 2b-2) Bind all tacsat files with no matching FT_REF or VE_REF
+      #-------------------------------------------------------------------------------
+      
+      if("day" %in% level){ tacsatREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat)
+      } else {
+        if("ICESrectangle" %in% level){ tacsatREF <- rbind(byRectTacsat,byVessTacsat)
+        } else { tacsatREF <- byVessTacsat}}
+      tacsatREF[,kgeur(colnames(tacsatREF))]      <- sweep(tacsatREF[,kgeur(colnames(tacsatREF))],1,tacsatREF$pings,"/")
+      tacsatREF$ID                                <- af(ac(tacsatREF$ID.x))
+      DT                                          <- data.table(tacsatREF)
+      eq1                                         <- c.listquote(paste("sum(",colnames(tacsatREF[,kgeur(colnames(tacsatREF))]),",na.rm=TRUE)",sep=""))
+      tacsatREF                                   <- DT[,eval(eq1),by=ID.x]; tacsatREF <- data.frame(tacsatREF); setnames(tacsatREF,colnames(tacsatREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))])))
+    }
+  }#End conserve
+  
+  #-------------------------------------------------------------------------------
+  # 3) Merge all tacsat files together and return
+  #-------------------------------------------------------------------------------
+  
+  if(conserve==TRUE){
+    if(exists("tacsatFTREF")){one   <- tacsatFTREF} else{ one   <- numeric()}
+    if(exists("tacsatVEREF")){two   <- tacsatVEREF} else{ two   <- numeric()}
+    if(exists("tacsatREF"))  {three <- tacsatREF}   else{ three <- numeric()}
+    tacsatTot       <- rbind(one,two,three)
+    DT              <- data.table(tacsatTot)
+    eq1             <- c.listquote(paste("sum(",colnames(tacsatTot[,kgeur(colnames(tacsatTot))]),",na.rm=TRUE)",sep=""))
+    tacsatTot       <- DT[,eval(eq1),by=ID]; tacsatTot <- data.frame(tacsatTot); setnames(tacsatTot,colnames(tacsatTot),c("ID",colnames(eflalo[,kgeur(colnames(eflalo))])))
+    tacsatReturn    <- merge(tacsat,tacsatTot,by="ID",all.x=TRUE)
+    if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))]
+    if(variable == "kgs")   tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))]
+    if(variable == "all")   tacsatReturn <- tacsatReturn
+  } else {
+    if(exists("tacsatFTREF")==FALSE){stop("You have selected not to conserve catches, but there is no trip identifier in the tacsat file")}
+    tacsatReturn  <- merge(tacsat,tacsatFTREF,by="ID",all.x=TRUE)
+    if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))]
+    if(variable == "kgs")   tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))]
+    if(variable == "all")   tacsatReturn <- tacsatReturn
+  }
+  if(returnAll & nrow(remtacsat)>0)
+    tacsatReturn <- orderBy(~ID,data=rbindTacsat(tacsatReturn,remtacsat))
+  
+  return(orderBy(~ID,data=tacsatReturn)[,-match("ID",colnames(tacsatReturn))])}
diff --git a/vmstools/R/summarizeTacsat.r b/vmstools/R/summarizeTacsat.r
index 0e8d03c..d33c8b6 100644
--- a/vmstools/R/summarizeTacsat.r
+++ b/vmstools/R/summarizeTacsat.r
@@ -1,57 +1,91 @@
-summarizeTacsat <- function(tacsat){
-
-  nrVessels     <- length(unique(tacsat$VE_REF))
-  nrCountries   <- length(unique(tacsat$VE_COU))
-
-  if (!"SI_DATIM" %in% colnames(tacsat))
-        tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,
-            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-
-  if("FT_REF" %in% colnames(tacsat)){
-    totalEffort  <- sum(intervalTacsat(tacsat,level="trip",fill.na=TRUE)$INTV,na.rm=TRUE)/60
-  } else {
-    totalEffort  <- sum(intervalTacsat(tacsat,level="vessel",fill.na=TRUE)$INTV,na.rm=TRUE)/60
-  }
-  return(as.data.frame(
-          cbind(desc= c("nrCountries","nrVessels","minLon","maxLon","minLat","maxLat","minTime","maxTime","minHeading","maxHeading","minSpeed","maxSpeed","effort(hr)"),
-                value=c(nrCountries,nrVessels,round(range(tacsat$SI_LONG,na.rm=TRUE),3),round(range(tacsat$SI_LATI,na.rm=TRUE),3),
-                        ac(range(tacsat$SI_DATIM,na.rm=TRUE)[1]),ac(range(tacsat$SI_DATIM,na.rm=TRUE)[2]),
-                        range(tacsat$SI_HE,na.rm=TRUE),range(tacsat$SI_SP,na.rm=TRUE),round(totalEffort,1))),stringsAsFactors=FALSE))}
-  
-summarizeEflalo <- function(eflalo){
-
-  nrVessels     <- length(unique(eflalo$VE_REF))
-  nrCountries   <- length(unique(eflalo$VE_COU))
-
-  if (!"FT_DDATIM" %in% colnames(eflalo))
-        eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT, eflalo$FT_DTIME,
-            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-  if (!"FT_LDATIM" %in% colnames(eflalo))
-        eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT, eflalo$FT_LTIME,
-            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
-
-  totalEffort   <- sum(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="hours"),na.rm=TRUE)
-  totalKW       <- sum(eflalo$VE_KW,na.rm=TRUE)
-  meanLength    <- mean(eflalo$VE_LEN,na.rm=TRUE)
-  gears         <- names(rev(table(eflalo$LE_GEAR)))[1:3]
-  catchvals     <- colSums(eflalo[,kgeur(colnames(eflalo))],na.rm=TRUE)
-  catches       <- catchvals[grep("LE_KG_",names(catchvals))]
-  values        <- catchvals[grep("LE_EURO_",names(catchvals))]
-  
-  return(data.frame(
-          cbind(desc= c("nrCountries","nrVessels","minTime","maxTime","effort(hr)","totalKW","meanLength",
-                        "gear1","gear2","gear3","catch1","catch2","catch3","value1","value2","value3"),
-                value=c(nrCountries,nrVessels,ac(range(eflalo$FT_DDATIM)[1]),ac(range(eflalo$FT_LDATIM)[2]),
-                        round(totalEffort,1),round(totalKW,1),round(meanLength,1),gears,
-                        paste(names(rev(sort(catches)))[1:3],round(rev(sort(catches))[1:3],0)),
-                        paste(names(rev(sort(values )))[1:3],round(rev(sort(values ))[1:3],0)))),
-                stringsAsFactors=FALSE))}
-  
-
-
-
-
-
-
-
-
+#' Summarize primary aspects of tacsat file
+#' 
+#' Summary nr vessels, countries, spatial range, temporal range, effort from
+#' tacsat
+#' 
+#' 
+#' @param tacsat tacsat dataframe
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{summarizeEflalo}},\code{\link{summary}}
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#' summarizeTacsat(tacsat)
+#' 
+#' @export summarizeTacsat
+summarizeTacsat <- function(tacsat){
+
+  nrVessels     <- length(unique(tacsat$VE_REF))
+  nrCountries   <- length(unique(tacsat$VE_COU))
+
+  if (!"SI_DATIM" %in% colnames(tacsat))
+        tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,
+            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+
+  if("FT_REF" %in% colnames(tacsat)){
+    totalEffort  <- sum(intervalTacsat(tacsat,level="trip",fill.na=TRUE)$INTV,na.rm=TRUE)/60
+  } else {
+    totalEffort  <- sum(intervalTacsat(tacsat,level="vessel",fill.na=TRUE)$INTV,na.rm=TRUE)/60
+  }
+  return(as.data.frame(
+          cbind(desc= c("nrCountries","nrVessels","minLon","maxLon","minLat","maxLat","minTime","maxTime","minHeading","maxHeading","minSpeed","maxSpeed","effort(hr)"),
+                value=c(nrCountries,nrVessels,round(range(tacsat$SI_LONG,na.rm=TRUE),3),round(range(tacsat$SI_LATI,na.rm=TRUE),3),
+                        ac(range(tacsat$SI_DATIM,na.rm=TRUE)[1]),ac(range(tacsat$SI_DATIM,na.rm=TRUE)[2]),
+                        range(tacsat$SI_HE,na.rm=TRUE),range(tacsat$SI_SP,na.rm=TRUE),round(totalEffort,1))),stringsAsFactors=FALSE))}
+  
+
+
+#' Summarize primary aspects of eflalo file
+#' 
+#' Summary nr vessels, countries, temporal range, effort, top 3 catches and
+#' values
+#' 
+#' 
+#' @param eflalo eflalo dataframe
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{summarizeTacsat}},\code{\link{summary}}
+#' @examples
+#' 
+#' data(eflalo)
+#' 
+#' summarizeEflalo(eflalo)
+#' 
+#' @export summarizeEflalo
+summarizeEflalo <- function(eflalo){
+
+  nrVessels     <- length(unique(eflalo$VE_REF))
+  nrCountries   <- length(unique(eflalo$VE_COU))
+
+  if (!"FT_DDATIM" %in% colnames(eflalo))
+        eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT, eflalo$FT_DTIME,
+            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+  if (!"FT_LDATIM" %in% colnames(eflalo))
+        eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT, eflalo$FT_LTIME,
+            sep = " "), tz = "GMT", format = "%d/%m/%Y  %H:%M")
+
+  totalEffort   <- sum(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="hours"),na.rm=TRUE)
+  totalKW       <- sum(eflalo$VE_KW,na.rm=TRUE)
+  meanLength    <- mean(eflalo$VE_LEN,na.rm=TRUE)
+  gears         <- names(rev(table(eflalo$LE_GEAR)))[1:3]
+  catchvals     <- colSums(eflalo[,kgeur(colnames(eflalo))],na.rm=TRUE)
+  catches       <- catchvals[grep("LE_KG_",names(catchvals))]
+  values        <- catchvals[grep("LE_EURO_",names(catchvals))]
+  
+  return(data.frame(
+          cbind(desc= c("nrCountries","nrVessels","minTime","maxTime","effort(hr)","totalKW","meanLength",
+                        "gear1","gear2","gear3","catch1","catch2","catch3","value1","value2","value3"),
+                value=c(nrCountries,nrVessels,ac(range(eflalo$FT_DDATIM)[1]),ac(range(eflalo$FT_LDATIM)[2]),
+                        round(totalEffort,1),round(totalKW,1),round(meanLength,1),gears,
+                        paste(names(rev(sort(catches)))[1:3],round(rev(sort(catches))[1:3],0)),
+                        paste(names(rev(sort(values )))[1:3],round(rev(sort(values ))[1:3],0)))),
+                stringsAsFactors=FALSE))}
+  
+
+
+
+
+
+
+
+
diff --git a/vmstools/R/surface.r b/vmstools/R/surface.r
index 2674815..69751b2 100644
--- a/vmstools/R/surface.r
+++ b/vmstools/R/surface.r
@@ -1,100 +1,148 @@
-surface <- function(obj,method="Trapezoid",includeNA=TRUE,zone=NULL){  #Methods can be "Trapezoid" and "UTM"
-        require(sp)
-        require(PBSmapping)
-        if(!class(obj) %in% c('SpatialGridDataFrame','SpatialPolygons'))
-          stop("class of obj should be SpatialGridDataFrame or SpatialPolygons")
-        
-        if(class(obj) == 'SpatialPolygons')
-          {
-            allSourcePoly <- numeric()
-            counter       <- 0
-            for(iPol1 in 1:length(obj@polygons)){
-              for(iPol2 in 1:length(obj@polygons[[iPol1]])){
-                counter   <- counter + 1
-                sourcePoly <- data.frame(cbind(1,1:nrow(obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords),
-                                         obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2]))
-                rownames(sourcePoly)<-1:nrow(sourcePoly)
-                colnames(sourcePoly)<-c("PID","POS","X","Y")
-                sourcePoly$PID[]    <-counter
-                
-                allSourcePoly       <- rbind(allSourcePoly,sourcePoly)
-              }
-            }
-            areas   <- calcArea(as.PolySet(allSourcePoly, projection="LL",zone=zone))$area
-            areas   <- data.frame(
-                        cbind(areas,do.call(rbind,lapply(obj@polygons,function(x){return(x@labpt)})),
-                                    do.call(rbind,lapply(obj@polygons,function(x){return(x@ID)}))),stringsAsFactors=FALSE)
-            colnames(areas) <- c("areas","labptx","labpty","ID")
-
-
-            obj     <- SpatialPolygons(lapply(obj@polygons,function(x){
-                                res <- lapply( x@Polygons,function(y){
-                                                            subAreas <- subset(areas,ID == x@ID & labptx == ac(y@labpt[1]) & labpty == ac(y@labpt[2]))
-                                                            y@area   <- anf(subAreas$areas);return(y)});
-                                                          return(Polygons(res,ID=x@ID))}))
-
-          }
-        if(!method %in% c("Trapezoid","UTM")) stop("method not available")
-        if (class(obj) %in% c('SpatialGridDataFrame')) # not empty...
-          {
-             if(method == "Trapezoid"){
-               res <- ceiling(max(obj@grid@cellsize,na.rm=TRUE)/0.1 * 10)  #automatic scaling
-               if(res < 3) res <- 3
-               griddims <- summary(obj)$grid
-               bboxdims <- bbox(obj)
-               stlon    <- bboxdims[1,1]
-               stlat    <- bboxdims[2,1]
-               enlon    <- bboxdims[1,2]
-               enlat    <- bboxdims[2,2]
-               sizelon  <- griddims[1,2]
-               sizelat  <- griddims[2,2]
-
-               lons     <- seq(stlon,enlon,sizelon)
-               lats     <- seq(stlat,enlat,sizelat)
-
-               heights  <- distance(lon=0,lat=stlat,lonRef=0,latRef=stlat+sizelat/(res-1))
-               seqlats  <- mapply(seq,lats[1:(length(lats)-1)],lats[2:length(lats)],length.out=res)
-
-               base     <- matrix(mapply(distance,lon=0,lat=c(seqlats),lonRef=sizelon,latRef=c(seqlats)),ncol=res,byrow=TRUE)
-               if(dim(base)[1] == 1){
-                  base1     <- base[1:(res-1)]
-                  base2     <- base[2:res]
-                  surface   <- rep(sum(heights * (base1 + base2)/2),each=length(seq(stlon,enlon-sizelon,sizelon)))
-               } else {
-                  base1    <- base[,1:(res-1)]
-                  base2    <- base[,2:res]
-                  surface <- rep(apply(heights * (base1 + base2) / 2,1,sum),each=length(seq(stlon,enlon-sizelon,sizelon)))
-                }
-
-               obj@data$cellArea <- rev(surface)
-            }
-          if(method == "UTM"){
-            require(PBSmapping)
-            griddims <- summary(obj)$grid
-            sizelon  <- griddims[1,2]
-            sizelat  <- griddims[2,2]
-
-            ltCentreCell<-coordinates(obj)
-
-            for (x in 1:(length(ltCentreCell)/2)){
-              if (includeNA) {        # speed up the calculation by dropping cells with fishing=NA  /!| only work for DCF5 and DCF6!
-                minX<-ltCentreCell[x,1]-sizelon/2
-                maxX<-ltCentreCell[x,1]+sizelon/2
-                minY<-ltCentreCell[x,2]-sizelat/2
-                maxY<-ltCentreCell[x,2]+sizelat/2
-                ltX<-c(minX,minX,maxX,maxX)
-                ltY<-c(minY,maxY,maxY,minY)
-                sourcePoly<-cbind(rep(1,4),seq(1,4),ltX,ltY)
-                rownames(sourcePoly)<-seq(1,4)
-                colnames(sourcePoly)<-c("PID","POS","X","Y")
-
-                polyArea<-calcArea(as.PolySet(sourcePoly, projection="LL",zone=zone))
-                singleCellArea<-polyArea$area
-              } else {singleCellArea<-NA}
-              obj@data$cellArea[x]<-singleCellArea
-            }
-          }
-        }
-        return(obj)}
-         
-         
\ No newline at end of file
+#' Calculate surface from grid cells or polygons
+#' 
+#' Calculate the surface in km2 of the grid / polygons that has been used
+#' 
+#' Method UTM might take longer due to way of calculation, but is more precise
+#' than the Trapezoid function, especially when larger gridcells are used.
+#' 
+#' @param obj defined SpatialGridDataFrame or SpatialPolygons object (see 'sp'
+#' package)
+#' @param method Method to be used to calculate surface, either Trapezoid or
+#' UTM
+#' @param includeNA Whether to include cells which do not hold any data
+#' @param zone Include UTM zone notation (or detected automatically when NULL
+#' @return If obj is a SpatialGridDataFrame an additional column named
+#' 'cellArea' is returned which holds the km2 area of the grid cell. If obj is
+#' a SpatialPolygons, each polygon area slot is filled with the area in km2.
+#' @author Niels T. Hintzen
+#' @seealso \code{\link{createGrid}}
+#' @examples
+#' 
+#' data(tacsat)
+#' 
+#'   #Sort the Tacsat data
+#' tacsat     <- sortTacsat(tacsat)
+#' tacsat     <- tacsat[1:1000,]
+#' 
+#'   #Get the ranges of the tacsat data
+#' xrange  <- range(tacsat$SI_LONG,na.rm=TRUE);
+#' xrange  <- c(min(xrange) - min(xrange)*0.05,
+#'              max(xrange) + max(xrange)*0.05)
+#' yrange  <- range(tacsat$SI_LATI,na.rm=TRUE);
+#' yrange  <- c(min(yrange) - min(yrange)*0.05,
+#'              max(yrange) + max(yrange)*0.05)
+#'   #Setup a grid
+#' sPDF    <- createGrid(xrange,yrange,resx=0.1,resy=0.05,type="SpatialGridDataFrame")
+#' 
+#'   #Setup a polygon
+#' sP      <- lonLat2SpatialPolygons(lst=list(data.frame(
+#'               SI_LONG=c(4,4.5,4.7,4),
+#'               SI_LATI=c(54,54,55.5,55.7))))
+#' 
+#'   #Calculate the cell surface
+#' result        <- surface(sPDF,method="Trapezoid",includeNA=TRUE)
+#' print(head(result@data))
+#' result        <- surface(sP,zone=31)
+#' print(result@polygons[[1]]@Polygons[[1]]@area)
+#' 
+#' @export surface
+surface <- function(obj,method="Trapezoid",includeNA=TRUE,zone=NULL){  #Methods can be "Trapezoid" and "UTM"
+        require(sp)
+        require(PBSmapping)
+        if(!class(obj) %in% c('SpatialGridDataFrame','SpatialPolygons'))
+          stop("class of obj should be SpatialGridDataFrame or SpatialPolygons")
+        
+        if(class(obj) == 'SpatialPolygons')
+          {
+            allSourcePoly <- numeric()
+            counter       <- 0
+            for(iPol1 in 1:length(obj@polygons)){
+              for(iPol2 in 1:length(obj@polygons[[iPol1]])){
+                counter   <- counter + 1
+                sourcePoly <- data.frame(cbind(1,1:nrow(obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords),
+                                         obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2]))
+                rownames(sourcePoly)<-1:nrow(sourcePoly)
+                colnames(sourcePoly)<-c("PID","POS","X","Y")
+                sourcePoly$PID[]    <-counter
+                
+                allSourcePoly       <- rbind(allSourcePoly,sourcePoly)
+              }
+            }
+            areas   <- calcArea(as.PolySet(allSourcePoly, projection="LL",zone=zone))$area
+            areas   <- data.frame(
+                        cbind(areas,do.call(rbind,lapply(obj@polygons,function(x){return(x@labpt)})),
+                                    do.call(rbind,lapply(obj@polygons,function(x){return(x@ID)}))),stringsAsFactors=FALSE)
+            colnames(areas) <- c("areas","labptx","labpty","ID")
+
+
+            obj     <- SpatialPolygons(lapply(obj@polygons,function(x){
+                                res <- lapply( x@Polygons,function(y){
+                                                            subAreas <- subset(areas,ID == x@ID & labptx == ac(y@labpt[1]) & labpty == ac(y@labpt[2]))
+                                                            y@area   <- anf(subAreas$areas);return(y)});
+                                                          return(Polygons(res,ID=x@ID))}))
+
+          }
+        if(!method %in% c("Trapezoid","UTM")) stop("method not available")
+        if (class(obj) %in% c('SpatialGridDataFrame')) # not empty...
+          {
+             if(method == "Trapezoid"){
+               res <- ceiling(max(obj@grid@cellsize,na.rm=TRUE)/0.1 * 10)  #automatic scaling
+               if(res < 3) res <- 3
+               griddims <- summary(obj)$grid
+               bboxdims <- bbox(obj)
+               stlon    <- bboxdims[1,1]
+               stlat    <- bboxdims[2,1]
+               enlon    <- bboxdims[1,2]
+               enlat    <- bboxdims[2,2]
+               sizelon  <- griddims[1,2]
+               sizelat  <- griddims[2,2]
+
+               lons     <- seq(stlon,enlon,sizelon)
+               lats     <- seq(stlat,enlat,sizelat)
+
+               heights  <- distance(lon=0,lat=stlat,lonRef=0,latRef=stlat+sizelat/(res-1))
+               seqlats  <- mapply(seq,lats[1:(length(lats)-1)],lats[2:length(lats)],length.out=res)
+
+               base     <- matrix(mapply(distance,lon=0,lat=c(seqlats),lonRef=sizelon,latRef=c(seqlats)),ncol=res,byrow=TRUE)
+               if(dim(base)[1] == 1){
+                  base1     <- base[1:(res-1)]
+                  base2     <- base[2:res]
+                  surface   <- rep(sum(heights * (base1 + base2)/2),each=length(seq(stlon,enlon-sizelon,sizelon)))
+               } else {
+                  base1    <- base[,1:(res-1)]
+                  base2    <- base[,2:res]
+                  surface <- rep(apply(heights * (base1 + base2) / 2,1,sum),each=length(seq(stlon,enlon-sizelon,sizelon)))
+                }
+
+               obj@data$cellArea <- rev(surface)
+            }
+          if(method == "UTM"){
+            require(PBSmapping)
+            griddims <- summary(obj)$grid
+            sizelon  <- griddims[1,2]
+            sizelat  <- griddims[2,2]
+
+            ltCentreCell<-coordinates(obj)
+
+            for (x in 1:(length(ltCentreCell)/2)){
+              if (includeNA) {        # speed up the calculation by dropping cells with fishing=NA  /!| only work for DCF5 and DCF6!
+                minX<-ltCentreCell[x,1]-sizelon/2
+                maxX<-ltCentreCell[x,1]+sizelon/2
+                minY<-ltCentreCell[x,2]-sizelat/2
+                maxY<-ltCentreCell[x,2]+sizelat/2
+                ltX<-c(minX,minX,maxX,maxX)
+                ltY<-c(minY,maxY,maxY,minY)
+                sourcePoly<-cbind(rep(1,4),seq(1,4),ltX,ltY)
+                rownames(sourcePoly)<-seq(1,4)
+                colnames(sourcePoly)<-c("PID","POS","X","Y")
+
+                polyArea<-calcArea(as.PolySet(sourcePoly, projection="LL",zone=zone))
+                singleCellArea<-polyArea$area
+              } else {singleCellArea<-NA}
+              obj@data$cellArea[x]<-singleCellArea
+            }
+          }
+        }
+        return(obj)}
+         
+         
diff --git a/vmstools/R/table_variables.r b/vmstools/R/table_variables.r
index f6f6026..0fe45fd 100644
--- a/vmstools/R/table_variables.r
+++ b/vmstools/R/table_variables.r
@@ -1,13 +1,41 @@
-#############################################################
-# Transpose the dataset (change variables into individuals) #
-#############################################################
-
-table_variables=function(data){
-  n=nrow(data)
-  res1=t(as.matrix(data[1:round(n/2),]))
-  res2=t(as.matrix(data[(round(n/2)+1):n,]))
-  res=cbind(res1,res2)
-  row.names(res)=colnames(data)
-  colnames(res)=row.names(data)
-  return(res)
-}
+#############################################################
+# Transpose the dataset (change variables into individuals) #
+#############################################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export table_variables
+table_variables=function(data){
+  n=nrow(data)
+  res1=t(as.matrix(data[1:round(n/2),]))
+  res2=t(as.matrix(data[(round(n/2)+1):n,]))
+  res=cbind(res1,res2)
+  row.names(res)=colnames(data)
+  colnames(res)=row.names(data)
+  return(res)
+}
diff --git a/vmstools/R/tacsatMCP.r b/vmstools/R/tacsatMCP.r
index 55f06c1..6ee19e7 100644
--- a/vmstools/R/tacsatMCP.r
+++ b/vmstools/R/tacsatMCP.r
@@ -1,19 +1,53 @@
-## tacsatMinimumConvexPolygon.r
-## by Fabrizio Manco, 22/09/2010
-## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6)
-
-tacsatMCP <- function (tacsat, pctThreshold=90)
-{
-  require(aspace)
-
-  vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI)
-
-  vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold)
-
-  MCPolygon<-as.data.frame(vmsMCP$MCP.coords)
-  pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3])
-
-  tacsat$INMCP<-pointInOut
-  
-  return(tacsat)
-}
\ No newline at end of file
+## tacsatMinimumConvexPolygon.r
+## by Fabrizio Manco, 22/09/2010
+## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6)
+
+
+
+#' Flag the tacsat ping inside a Minimum Convex Polygon
+#' 
+#' This function will flag the tacsat pings that are located inside a minimum
+#' convex polygon which contains a percentage of the whole number of pings
+#' 
+#' This function is based on the calc_mcp function of the aspace package. The
+#' plot_mcp function allows to plot the polygon. It is used in the DCF
+#' Indicator 6 as a filtering of aggregated fisheries.
+#' 
+#' @param tacsat tacsat data frame
+#' @param pctThreshold Percentage of pings to be included into the MCP
+#' @return The tacsat is returned with attached a column called INMCP which
+#' flag the pings inside the MCP. The nodes of the MCP are also returned.
+#' @author Fabrizio Manco
+#' @seealso \code{\link{indicators}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#' require(PBSmapping)
+#' data(tacsat)
+#' 
+#' # Flag the pings inside a polygon gathering 90% of the pings
+#' tacsat<-tacsatMCP(tacsat, pctThreshold=90)
+#' # Filter the tacsat data to remove the points outside the MCP
+#' tacsat<-subset(tacsat, tacsat$INMCP!=0)
+#' # Grid the filtered vms points
+#' vmsgrid<-vmsGridCreate(tacsat, nameLon = "SI_LONG", nameLat = "SI_LATI",
+#'                        cellsizeX=0.05, cellsizeY=0.05, plotMap=TRUE)
+#' # Add the MCP to the map
+#' plot_mcp(plotnew=FALSE, plotpoints=FALSE, titletxt="")
+#' 
+#' @export tacsatMCP
+tacsatMCP <- function (tacsat, pctThreshold=90)
+{
+  require(aspace)
+
+  vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI)
+
+  vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold)
+
+  MCPolygon<-as.data.frame(vmsMCP$MCP.coords)
+  pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3])
+
+  tacsat$INMCP<-pointInOut
+  
+  return(tacsat)
+}
diff --git a/vmstools/R/tacsatMinimumConvexPolygon.r b/vmstools/R/tacsatMinimumConvexPolygon.r
index 42f8d67..cd2a70c 100644
--- a/vmstools/R/tacsatMinimumConvexPolygon.r
+++ b/vmstools/R/tacsatMinimumConvexPolygon.r
@@ -1,21 +1,43 @@
-## tacsatMinimumConvexPolygon.r
-## by Fabrizio Manco, 22/09/2010
-## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6)
-
-tacsatMinimumConvexPolygon <- function (tacsat, 
-                                        pctThreshold=90)
-{
-  require(aspace)
-  require(ade4)
-
-  vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI)
-
-  vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold)
-
-  MCPolygon<-as.data.frame(vmsMCP$MCP.coords)
-  pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3])
-
-  tacsat$INMCP<-pointInOut
-  
-  return(tacsat)
-}
\ No newline at end of file
+## tacsatMinimumConvexPolygon.r
+## by Fabrizio Manco, 22/09/2010
+## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6)
+
+
+
+#' Flag tacsat records that are within the convex polygon
+#' 
+#' Flag tacsat records that are within the convex polygon with a predefined
+#' threshold
+#' 
+#' See point.in.polygon function for returned value details
+#' 
+#' @param tacsat Tacsat dataframe
+#' @param pctThreshold Threshold of points to consider. Between 0 and 100.
+#' @author Fabrizio Manco
+#' @references EU Lot 2 project
+#' @examples
+#' 
+#' require(adehabitat)
+#' require(ade4)
+#' data(tacsat)
+#' tacsat <- tacsat[1:100,]
+#' tacsatMinimumConvexPolygon(tacsat,95)
+#' 
+#' @export tacsatMinimumConvexPolygon
+tacsatMinimumConvexPolygon <- function (tacsat, 
+                                        pctThreshold=90)
+{
+  require(aspace)
+  require(ade4)
+
+  vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI)
+
+  vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold)
+
+  MCPolygon<-as.data.frame(vmsMCP$MCP.coords)
+  pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3])
+
+  tacsat$INMCP<-pointInOut
+  
+  return(tacsat)
+}
diff --git a/vmstools/R/targetspecies.r b/vmstools/R/targetspecies.r
index 3f97858..c9c50ae 100644
--- a/vmstools/R/targetspecies.r
+++ b/vmstools/R/targetspecies.r
@@ -1,34 +1,62 @@
-#############################################################
-# Determine the species with a test-value > 1.96 by cluster #
-#############################################################
-
-targetspecies=function(resval){
-  p=nrow(resval)
-  nbgp=ncol(resval)
-
-  tabnumespcib=data.frame()
-  tabnomespcib=data.frame()
-
-  for(i in 1:nbgp){
-    # qnorm(0.975,mean=0,sd=1)=1.96     (P(resval>1.96)=0.025)
-    numespcib=which(resval[,i]>1.96)
-    numespcibdec=numespcib[order(resval[numespcib,i],decreasing=TRUE)]
-    nomespcib=names(numespcibdec)
-
-    nbespgpcib=length(numespcib)
-
-    if(nbespgpcib>0){
-      for (j in 1:nbespgpcib){
-        tabnumespcib[i,j]=numespcibdec[j]
-        tabnomespcib[i,j]=nomespcib[j]
-      }
-    }else{
-        tabnumespcib[i,]=NA
-        tabnomespcib[i,]=NA
-    }
-  }
-  tabnumespcib=as.matrix(tabnumespcib)
-  tabnomespcib=as.matrix(tabnomespcib)
-  return(list(tabnumespcib=tabnumespcib,tabnomespcib=tabnomespcib))
-}
-
+#############################################################
+# Determine the species with a test-value > 1.96 by cluster #
+#############################################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export targetspecies
+targetspecies=function(resval){
+  p=nrow(resval)
+  nbgp=ncol(resval)
+
+  tabnumespcib=data.frame()
+  tabnomespcib=data.frame()
+
+  for(i in 1:nbgp){
+    # qnorm(0.975,mean=0,sd=1)=1.96     (P(resval>1.96)=0.025)
+    numespcib=which(resval[,i]>1.96)
+    numespcibdec=numespcib[order(resval[numespcib,i],decreasing=TRUE)]
+    nomespcib=names(numespcibdec)
+
+    nbespgpcib=length(numespcib)
+
+    if(nbespgpcib>0){
+      for (j in 1:nbespgpcib){
+        tabnumespcib[i,j]=numespcibdec[j]
+        tabnomespcib[i,j]=nomespcib[j]
+      }
+    }else{
+        tabnumespcib[i,]=NA
+        tabnomespcib[i,]=NA
+    }
+  }
+  tabnumespcib=as.matrix(tabnumespcib)
+  tabnomespcib=as.matrix(tabnomespcib)
+  return(list(tabnumespcib=tabnumespcib,tabnomespcib=tabnomespcib))
+}
+
diff --git a/vmstools/R/tclWindows.r b/vmstools/R/tclWindows.r
index 6a31695..c6b00bd 100644
--- a/vmstools/R/tclWindows.r
+++ b/vmstools/R/tclWindows.r
@@ -1,65 +1,75 @@
-callNumberPeak <- function(){
-tt <- tktoplevel()
-peaks <- tclVar(5)
-
-f1 <- tkframe(tt)
-tkpack(f1, side='top')
-tkpack(tklabel(f1, text='peaks: '), side='left')
-tkpack(tkentry(f1, textvariable=peaks), side='left')
-
-tkpack(tkbutton(tt, text='Next', command=function() tkdestroy(tt)),
-        side='right', anchor='s')
-
-tkwait.window(tt)
-return(as.numeric(tclvalue(peaks)))}
-
-
-
-
-callPeakValue <- function(pks){
-
-#-Put default values for peaks
-for(iPeaks in 1:pks){
-  if(iPeaks == 1) entry1 <- tclVar("-10")
-  if(iPeaks == 2) entry2 <- tclVar("-5")
-  if(iPeaks == 3) entry3 <- tclVar("0")
-  if(iPeaks == 4) entry4 <- tclVar("5")
-  if(iPeaks == 5) entry5 <- tclVar("10")
-}
-
-#-Create input window
-tt <- tktoplevel()
-tkwm.title(tt,"Value of peaks")
-for(iPeaks in 1:pks){
-  if(iPeaks==1) box1 <- tkentry(tt, textvariable=entry1)
-  if(iPeaks==2) box2 <- tkentry(tt, textvariable=entry2)
-  if(iPeaks==3) box3 <- tkentry(tt, textvariable=entry3)
-  if(iPeaks==4) box4 <- tkentry(tt, textvariable=entry4)
-  if(iPeaks==5) box5 <- tkentry(tt, textvariable=entry5)
-}
-#-Create input rows
-tkgrid(tklabel(tt,text="value of peaks"),columnspan=pks)
-for(iPeaks in 1:pks){
-  if(iPeaks==1) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box1)
-  if(iPeaks==2) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box2)
-  if(iPeaks==3) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box3)
-  if(iPeaks==4) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box4)
-  if(iPeaks==5) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box5)
-}
-
-done <- tclVar(0)
-eqvar <- tclVar(0)
-
-#-Create submit button
-submit.but <- tkbutton(tt, text="submit",command=function()tclvalue(done)<-1)
-
-tkgrid(submit.but)
-tkbind(tt, "<Destroy>", function()tclvalue(done)<-2)
-tkwait.variable(done)
-
-if(tclvalue(done)=="2") stop("aborted")
-tkdestroy(tt)
-valPeaks <- numeric()
-for(iPks in 1:pks) valPeaks <- paste(valPeaks,tclvalue(get(paste("entry",iPks,sep=""))))
-return(valPeaks)}
-
+callNumberPeak <- function(){
+tt <- tktoplevel()
+peaks <- tclVar(5)
+
+f1 <- tkframe(tt)
+tkpack(f1, side='top')
+tkpack(tklabel(f1, text='peaks: '), side='left')
+tkpack(tkentry(f1, textvariable=peaks), side='left')
+
+tkpack(tkbutton(tt, text='Next', command=function() tkdestroy(tt)),
+        side='right', anchor='s')
+
+tkwait.window(tt)
+return(as.numeric(tclvalue(peaks)))}
+
+
+
+
+
+
+#' Internal function to retreive information on number of peaks
+#' 
+#' Internal function used together with analyseTacsatActivity.
+#' 
+#' 
+#' @param pks Number of peaks spotted
+#' @author Niels T. Hintzen
+#' @export callPeakValue
+callPeakValue <- function(pks){
+
+#-Put default values for peaks
+for(iPeaks in 1:pks){
+  if(iPeaks == 1) entry1 <- tclVar("-10")
+  if(iPeaks == 2) entry2 <- tclVar("-5")
+  if(iPeaks == 3) entry3 <- tclVar("0")
+  if(iPeaks == 4) entry4 <- tclVar("5")
+  if(iPeaks == 5) entry5 <- tclVar("10")
+}
+
+#-Create input window
+tt <- tktoplevel()
+tkwm.title(tt,"Value of peaks")
+for(iPeaks in 1:pks){
+  if(iPeaks==1) box1 <- tkentry(tt, textvariable=entry1)
+  if(iPeaks==2) box2 <- tkentry(tt, textvariable=entry2)
+  if(iPeaks==3) box3 <- tkentry(tt, textvariable=entry3)
+  if(iPeaks==4) box4 <- tkentry(tt, textvariable=entry4)
+  if(iPeaks==5) box5 <- tkentry(tt, textvariable=entry5)
+}
+#-Create input rows
+tkgrid(tklabel(tt,text="value of peaks"),columnspan=pks)
+for(iPeaks in 1:pks){
+  if(iPeaks==1) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box1)
+  if(iPeaks==2) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box2)
+  if(iPeaks==3) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box3)
+  if(iPeaks==4) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box4)
+  if(iPeaks==5) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box5)
+}
+
+done <- tclVar(0)
+eqvar <- tclVar(0)
+
+#-Create submit button
+submit.but <- tkbutton(tt, text="submit",command=function()tclvalue(done)<-1)
+
+tkgrid(submit.but)
+tkbind(tt, "<Destroy>", function()tclvalue(done)<-2)
+tkwait.variable(done)
+
+if(tclvalue(done)=="2") stop("aborted")
+tkdestroy(tt)
+valPeaks <- numeric()
+for(iPks in 1:pks) valPeaks <- paste(valPeaks,tclvalue(get(paste("entry",iPks,sep=""))))
+return(valPeaks)}
+
diff --git a/vmstools/R/test.values.r b/vmstools/R/test.values.r
index 5b2aed3..1a3f073 100644
--- a/vmstools/R/test.values.r
+++ b/vmstools/R/test.values.r
@@ -1,48 +1,76 @@
-######################################################
-# Compute the test-value for each species by cluster #
-######################################################
-
-test.values=function(groupes,data){
-
-  n=nrow(data)
-  p=ncol(data)
-  noms_var=colnames(data)
-  nb_groupes=length(levels(as.factor(groupes)))
-  noms_groupes=character(nb_groupes)
-
-  stats_globales=matrix(0,nrow=p,ncol=2)
-  row.names(stats_globales)=noms_var
-  colnames(stats_globales)=c("mean","variance")
-  for (i in 1:p){
-    stats_globales[i,1]=mean(data[,noms_var[i]])
-    stats_globales[i,2]=var(data[,noms_var[i]])
-  }
-
-  res=matrix(0,nrow=p,ncol=nb_groupes)
-  row.names(res)=noms_var
-
-  for (j in 1:nb_groupes){
-    groupe=which(groupes==j)
-    n_k=length(groupe)
-
-    for (i in 1:p){
-      mu_k=mean(data[groupe,noms_var[i]])
-      mu=stats_globales[noms_var[i],"mean"]
-      V=stats_globales[noms_var[i],"variance"]
-      V_mu_k=(n-n_k)*V/(n_k*(n-1))
-
-      if(V_mu_k==0){
-        Valeur_test=0
-      }else{
-        Valeur_test=(mu_k-mu)/sqrt(V_mu_k)
-      }
-
-      res[i,j]=Valeur_test
-      rm(Valeur_test)
-    }
-    rm(groupe)
-    noms_groupes[j]=paste("Cluster",j,sep=" ")
-  }
-  colnames(res)=noms_groupes
-  return(res)
-}
+######################################################
+# Compute the test-value for each species by cluster #
+######################################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export test.values
+test.values=function(groupes,data){
+
+  n=nrow(data)
+  p=ncol(data)
+  noms_var=colnames(data)
+  nb_groupes=length(levels(as.factor(groupes)))
+  noms_groupes=character(nb_groupes)
+
+  stats_globales=matrix(0,nrow=p,ncol=2)
+  row.names(stats_globales)=noms_var
+  colnames(stats_globales)=c("mean","variance")
+  for (i in 1:p){
+    stats_globales[i,1]=mean(data[,noms_var[i]])
+    stats_globales[i,2]=var(data[,noms_var[i]])
+  }
+
+  res=matrix(0,nrow=p,ncol=nb_groupes)
+  row.names(res)=noms_var
+
+  for (j in 1:nb_groupes){
+    groupe=which(groupes==j)
+    n_k=length(groupe)
+
+    for (i in 1:p){
+      mu_k=mean(data[groupe,noms_var[i]])
+      mu=stats_globales[noms_var[i],"mean"]
+      V=stats_globales[noms_var[i],"variance"]
+      V_mu_k=(n-n_k)*V/(n_k*(n-1))
+
+      if(V_mu_k==0){
+        Valeur_test=0
+      }else{
+        Valeur_test=(mu_k-mu)/sqrt(V_mu_k)
+      }
+
+      res[i,j]=Valeur_test
+      rm(Valeur_test)
+    }
+    rm(groupe)
+    noms_groupes[j]=paste("Cluster",j,sep=" ")
+  }
+  colnames(res)=noms_groupes
+  return(res)
+}
diff --git a/vmstools/R/transformation_proportion.r b/vmstools/R/transformation_proportion.r
index bdf366f..6502af1 100644
--- a/vmstools/R/transformation_proportion.r
+++ b/vmstools/R/transformation_proportion.r
@@ -1,19 +1,47 @@
-#################################################################
-# Transform quantities to percentage values (between 0 and 100) #
-#        of each species in the logevent total catch            #
-#################################################################
-
-transformation_proportion=function(tab){
-  res=as.matrix(tab)
-  n=nrow(tab)
-  p=ncol(tab)
-  for (i in 1:n){
-    sommeligne=sum(res[i,], na.rm=TRUE)
-    if(sommeligne==0){
-      res[i,]=rep(0,p)
-    }else{
-      res[i,]=res[i,]*(100/sommeligne)
-    }
-  }
-  return(res)
-}
\ No newline at end of file
+#################################################################
+# Transform quantities to percentage values (between 0 and 100) #
+#        of each species in the logevent total catch            #
+#################################################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export transformation_proportion
+transformation_proportion=function(tab){
+  res=as.matrix(tab)
+  n=nrow(tab)
+  p=ncol(tab)
+  for (i in 1:n){
+    sommeligne=sum(res[i,], na.rm=TRUE)
+    if(sommeligne==0){
+      res[i,]=rep(0,p)
+    }else{
+      res[i,]=res[i,]*(100/sommeligne)
+    }
+  }
+  return(res)
+}
diff --git a/vmstools/R/vmsGridCreate.r b/vmstools/R/vmsGridCreate.r
index 8f5c5ad..162911e 100644
--- a/vmstools/R/vmsGridCreate.r
+++ b/vmstools/R/vmsGridCreate.r
@@ -1,141 +1,214 @@
-#vmsGridCreate.r
-#andy south 10/2/09
-
-#flexible function to create fishing activity grids from VMS
-#change to check upload Paris june 10
-
-vmsGridCreate <- function( dF
-                         , nameLon = "Longitude"
-                         , nameLat = "Latitude"
-                         , nameVarToSum = ""
-                         , cellsizeX = 0.5
-                         , cellsizeY = 0.5
-                         , we=""
-                         , ea=""
-                         , so=""
-                         , no=""
-                         , gridValName="fishing"
-                         , plotMap = TRUE
-                         , plotTitle = ""
-                         , numCats = 5
-                         , paletteCats = "heat.colors"
-                         , addLegend = TRUE
-                         , legendx='bottomleft'
-                         , legendncol = 1
-                         , legendtitle = "fishing activity"
-                         , plotPoints = TRUE
-                         , legPoints = FALSE
-                         , colPoints = 1
-                         , colLand = 'sienna'
-                         , addICESgrid = FALSE
-                         , addScale = TRUE
-                         , outGridFile = ""  #name for output gridAscii
-                         , outPlot = ""  #name for output png
-                         , ... )
-{
-
-require(sp)
-require(maptools)
-
-lstargs <- list(...)
-
-#only create grids when num records >0 (otherwise generates error)
-if ( nrow(dF) > 0 )
-   {
-   
-    #if bounds are not specified then set them from the data
-    #rounds bounds to nearest whole cell unit
-    if ( we == "" ) {we = min( dF[[nameLon]], na.rm=TRUE ); we = we - we%%cellsizeX}
-    if ( ea == "" ) {ea = max( dF[[nameLon]], na.rm=TRUE ); ea = ea - ea%%cellsizeX + cellsizeX}
-    if ( so == "" ) {so = min( dF[[nameLat]], na.rm=TRUE ); so = so - so%%cellsizeY}
-    if ( no == "" ) {no = max( dF[[nameLat]], na.rm=TRUE ); no = no - no%%cellsizeY + cellsizeY}
-    
-    #if ( ea == "" ) ea = max( dF[[nameLon]], na.rm=T )
-    #if ( so == "" ) so = min( dF[[nameLat]], na.rm=T )
-    #if ( no == "" ) no = max( dF[[nameLat]], na.rm=T )
-
-    #this copes with negative we or so values
-    numXcells <- ceiling((ea-we)/cellsizeX)
-    numYcells <- ceiling((no-so)/cellsizeY)
-    #this copes with negative ea or no values
-    numXcells <- abs(numXcells)
-    numYcells <- abs(numYcells)
-
-    #setting grid topology using package 'sp'
-    #gridTopology <- GridTopology(c(we,so), c(cellsize,cellsize), c(numXcells,numYcells))
-    #this sets grid at lower left corner rather than centre
-    gridTopology <- GridTopology(c(we+(cellsizeX/2),so+(cellsizeY/2)), c(cellsizeX,cellsizeY), c(numXcells,numYcells))
-    spatialGrid <- SpatialGrid(grid=gridTopology)
-    gridded(spatialGrid) = TRUE
-
-    #put points into a 'SpatialPointsDataFrame' 'sp' object
-    coords <- cbind(x=dF[[nameLon]],y=dF[[nameLat]])
-    sPDF <- SpatialPointsDataFrame(coords,data=dF)
-
-    #overlay to find which grid cell that each VMS point is in
-    #can take a long time, but does work eventually
-    gridCellIndexPerVMSpoint <- over( as(sPDF,"SpatialPoints"),spatialGrid )
-    sPDF$gridCellIndex <- gridCellIndexPerVMSpoint
-
-    #if there's a column of time intervals then sum them
-    if (nameVarToSum != "")
-       {
-        #sum timeInterval for each gridCellIndex
-        perCell <- tapply(sPDF[[nameVarToSum]], gridCellIndexPerVMSpoint, sum)
-        #print('1')
-       } 
-       else
-       {
-        #if no time interval, just count pings
-        #probably a better way than doing this!
-        sPDF$ones <- 1 #this just creates a vector of all 1s
-        perCell <- tapply(sPDF$ones, gridCellIndexPerVMSpoint, sum)
-        #print('2')
-       }
-   
-    #then need to get those aggregated data values back onto the original grid
-    #get it to a spatialGridDataFrame to be able to do that
-
-    #create blank dataframe for data based on grid dimensions I know : numXcells * numYcells
-    dFdataEmpty <- data.frame( seqID=(1:(numXcells*numYcells)))
-    spatialGridDataFrame <- SpatialGridDataFrame( grid=gridTopology, data=dFdataEmpty )
-
-    spatialGridDataFrame[[gridValName]] <- NA #setting up blank column
-    #assigns summed values per cell back to the grid
-    #!bit tricky
-    spatialGridDataFrame[[gridValName]][as.integer(names(perCell))] <- c(perCell)
-
-    if (plotMap)
-       {
-       
-        mapGrid(spatialGridDataFrame, sPDF
-               ,we=we, ea=ea, so=so, no=no
-               ,gridValName=gridValName, plotTitle = plotTitle
-               ,numCats = numCats, paletteCats =paletteCats, addLegend = addLegend
-               ,legendx=legendx, legendncol = legendncol
-               ,legendtitle = legendtitle, plotPoints = plotPoints, legPoints = legPoints, colPoints=colPoints 
-               , colLand=colLand, addICESgrid = addICESgrid, addScale = addScale
-               ,outGridFile = outGridFile, outPlot = outPlot, breaks0=lstargs$breaks0 )
-       } #end of plotMap
-
-    #to output spatialGridDataFrame to a gridascii file
-    if (outGridFile != "")
-       {
-        writeAsciiGrid(spatialGridDataFrame, outGridFile, na.value = -99.99, attr=gridValName, dec='.')
-       }
-
-    #option to save the plot
-    if (outPlot != "")
-       {
-        savePlot(outPlot,type='png')
-       }
-       
-    #returning invisibly
-    invisible(spatialGridDataFrame)   
-       
-   } #end of if (nrow(dF)>0)
-            
-} #end of vmsGridCreate
-
-
-
+#vmsGridCreate.r
+#andy south 10/2/09
+
+#flexible function to create fishing activity grids from VMS
+#change to check upload Paris june 10
+
+
+
+#' function to create grids from point data by counting points in cells or
+#' summing an attribute
+#' 
+#' Accepts an input of points data in a data frame with named columns for X and
+#' Y. Creates a grid of defined cell size in X and Y directions (cells can be
+#' unequal). Either counts points in cells or summs a attribute variable if one
+#' is supplied. Optionally plots a map of the grid and outputs to a gridAscii
+#' file and/or an image.
+#' 
+#' 
+#' @param dF a dataFrame containing point data
+#' @param nameLon name of the column in the dataFrame containing Longitude or x
+#' values
+#' @param nameLat name of the column in the dataFrame containing Latitude or y
+#' values
+#' @param nameVarToSum optional name of the column in the dataFrame containing
+#' the attribute values to sum in the grid. If set to "" points are counted
+#' @param cellsizeX length X (horizontal) of desired grid cells, in same units
+#' as the coordinates
+#' @param cellsizeY length Y (vertical) of desired grid cells, in same units as
+#' the coordinates
+#' @param we western bounds of the desired grid
+#' @param ea eastern bounds of the desired grid
+#' @param so southern bounds of the desired grid
+#' @param no northern bounds of the desired grid
+#' @param gridValName the name to give to the attribute column of the returned
+#' \code{SpatialGridDataFrame}, set to 'fishing' by default
+#' @param plotMap whether to plot a map of the resulting grid
+#' @param plotTitle optional title to add to the plot
+#' @param numCats how many categories to classify grid values into for map plot
+#' (uses\code{pretty()}) classification)
+#' @param paletteCats color pallete to use
+#' @param addLegend whether to add a legend to the plot
+#' @param legendx position of legend should be one of 'bottomright', 'bottom',
+#' 'bottomleft', 'left', 'topleft', 'top', 'topright', 'right', 'center'
+#' @param legendncol number of columns in the legend
+#' @param legendtitle legend title
+#' @param plotPoints whether to add the original points to the plot
+#' @param legPoints Logical. Points in legend
+#' @param colPoints color of points to plot
+#' @param colland color of land
+#' @param addICESgrid Logical. Adding ICES grid on top
+#' @param addScale Logical. Adding axes
+#' @param outGridFile optional name for a gridAscii file to be created from the
+#' grid
+#' @param outPlot optional name for a png file to be created from the plot
+#' @param \dots NOT used yet
+#' @return a \code{SpatialGridDataFrame} object of the grid defined in package
+#' \code{sp}
+#' @author Andy South
+#' @seealso \code{\link{mapGrid}}
+#' @references EU VMS tools project
+#' @examples
+#' 
+#' #vmsGridCreate(dF, nameLon = "POS_LONGITUDE", nameLat = "POS_LATITUDE",
+#' # cellsizeX = 0.5, cellsizeY = 0.5,legendx='bottomright',plotPoints=TRUE )
+#' #get the example data
+#' data(tacsat)
+#' 
+#' #subset the first 2000 points to avoid problems with NAs
+#' dFVMS <- tacsat[1:2000,]
+#' 
+#' #create vms grid minimum call with defaults
+#' vmsGridCreate(dFVMS,nameLat='SI_LATI',nameLon='SI_LONG')
+#' 
+#' #making the grid finer
+#' vmsGridCreate(dFVMS,nameLat='SI_LATI',nameLon='SI_LONG',
+#'               cellsizeX=0.05,cellsizeY=0.05)
+#' 
+#' 
+#' @export vmsGridCreate
+vmsGridCreate <- function( dF
+                         , nameLon = "Longitude"
+                         , nameLat = "Latitude"
+                         , nameVarToSum = ""
+                         , cellsizeX = 0.5
+                         , cellsizeY = 0.5
+                         , we=""
+                         , ea=""
+                         , so=""
+                         , no=""
+                         , gridValName="fishing"
+                         , plotMap = TRUE
+                         , plotTitle = ""
+                         , numCats = 5
+                         , paletteCats = "heat.colors"
+                         , addLegend = TRUE
+                         , legendx='bottomleft'
+                         , legendncol = 1
+                         , legendtitle = "fishing activity"
+                         , plotPoints = TRUE
+                         , legPoints = FALSE
+                         , colPoints = 1
+                         , colLand = 'sienna'
+                         , addICESgrid = FALSE
+                         , addScale = TRUE
+                         , outGridFile = ""  #name for output gridAscii
+                         , outPlot = ""  #name for output png
+                         , ... )
+{
+
+require(sp)
+require(maptools)
+
+lstargs <- list(...)
+
+#only create grids when num records >0 (otherwise generates error)
+if ( nrow(dF) > 0 )
+   {
+   
+    #if bounds are not specified then set them from the data
+    #rounds bounds to nearest whole cell unit
+    if ( we == "" ) {we = min( dF[[nameLon]], na.rm=TRUE ); we = we - we%%cellsizeX}
+    if ( ea == "" ) {ea = max( dF[[nameLon]], na.rm=TRUE ); ea = ea - ea%%cellsizeX + cellsizeX}
+    if ( so == "" ) {so = min( dF[[nameLat]], na.rm=TRUE ); so = so - so%%cellsizeY}
+    if ( no == "" ) {no = max( dF[[nameLat]], na.rm=TRUE ); no = no - no%%cellsizeY + cellsizeY}
+    
+    #if ( ea == "" ) ea = max( dF[[nameLon]], na.rm=T )
+    #if ( so == "" ) so = min( dF[[nameLat]], na.rm=T )
+    #if ( no == "" ) no = max( dF[[nameLat]], na.rm=T )
+
+    #this copes with negative we or so values
+    numXcells <- ceiling((ea-we)/cellsizeX)
+    numYcells <- ceiling((no-so)/cellsizeY)
+    #this copes with negative ea or no values
+    numXcells <- abs(numXcells)
+    numYcells <- abs(numYcells)
+
+    #setting grid topology using package 'sp'
+    #gridTopology <- GridTopology(c(we,so), c(cellsize,cellsize), c(numXcells,numYcells))
+    #this sets grid at lower left corner rather than centre
+    gridTopology <- GridTopology(c(we+(cellsizeX/2),so+(cellsizeY/2)), c(cellsizeX,cellsizeY), c(numXcells,numYcells))
+    spatialGrid <- SpatialGrid(grid=gridTopology)
+    gridded(spatialGrid) = TRUE
+
+    #put points into a 'SpatialPointsDataFrame' 'sp' object
+    coords <- cbind(x=dF[[nameLon]],y=dF[[nameLat]])
+    sPDF <- SpatialPointsDataFrame(coords,data=dF)
+
+    #overlay to find which grid cell that each VMS point is in
+    #can take a long time, but does work eventually
+    gridCellIndexPerVMSpoint <- over( as(sPDF,"SpatialPoints"),spatialGrid )
+    sPDF$gridCellIndex <- gridCellIndexPerVMSpoint
+
+    #if there's a column of time intervals then sum them
+    if (nameVarToSum != "")
+       {
+        #sum timeInterval for each gridCellIndex
+        perCell <- tapply(sPDF[[nameVarToSum]], gridCellIndexPerVMSpoint, sum)
+        #print('1')
+       } 
+       else
+       {
+        #if no time interval, just count pings
+        #probably a better way than doing this!
+        sPDF$ones <- 1 #this just creates a vector of all 1s
+        perCell <- tapply(sPDF$ones, gridCellIndexPerVMSpoint, sum)
+        #print('2')
+       }
+   
+    #then need to get those aggregated data values back onto the original grid
+    #get it to a spatialGridDataFrame to be able to do that
+
+    #create blank dataframe for data based on grid dimensions I know : numXcells * numYcells
+    dFdataEmpty <- data.frame( seqID=(1:(numXcells*numYcells)))
+    spatialGridDataFrame <- SpatialGridDataFrame( grid=gridTopology, data=dFdataEmpty )
+
+    spatialGridDataFrame[[gridValName]] <- NA #setting up blank column
+    #assigns summed values per cell back to the grid
+    #!bit tricky
+    spatialGridDataFrame[[gridValName]][as.integer(names(perCell))] <- c(perCell)
+
+    if (plotMap)
+       {
+       
+        mapGrid(spatialGridDataFrame, sPDF
+               ,we=we, ea=ea, so=so, no=no
+               ,gridValName=gridValName, plotTitle = plotTitle
+               ,numCats = numCats, paletteCats =paletteCats, addLegend = addLegend
+               ,legendx=legendx, legendncol = legendncol
+               ,legendtitle = legendtitle, plotPoints = plotPoints, legPoints = legPoints, colPoints=colPoints 
+               , colLand=colLand, addICESgrid = addICESgrid, addScale = addScale
+               ,outGridFile = outGridFile, outPlot = outPlot, breaks0=lstargs$breaks0 )
+       } #end of plotMap
+
+    #to output spatialGridDataFrame to a gridascii file
+    if (outGridFile != "")
+       {
+        writeAsciiGrid(spatialGridDataFrame, outGridFile, na.value = -99.99, attr=gridValName, dec='.')
+       }
+
+    #option to save the plot
+    if (outPlot != "")
+       {
+        savePlot(outPlot,type='png')
+       }
+       
+    #returning invisibly
+    invisible(spatialGridDataFrame)   
+       
+   } #end of if (nrow(dF)>0)
+            
+} #end of vmsGridCreate
+
+
+
diff --git a/vmstools/R/vmstools-package.R b/vmstools/R/vmstools-package.R
new file mode 100644
index 0000000..458bb2a
--- /dev/null
+++ b/vmstools/R/vmstools-package.R
@@ -0,0 +1,291 @@
+
+
+#' Vessel logbook dataset
+#' 
+#' A dataset consisting of logbook (landings / values) registrations of
+#' disguised origin for 2 consecutice years.
+#' 
+#' This example dataset if for example purposes only. Without prior permission
+#' of the authors it is not allowed to use this data other than for example
+#' non-publishable purposes.
+#' 
+#' @name eflalo
+#' @docType data
+#' @format A data frame with 4539 observations on the following 189 variables.
+#' \describe{ \item{list("VE_REF")}{Vessel reference / ID}
+#' \item{list("VE_FLT")}{Fleet reference (DCF regulation)}
+#' \item{list("VE_COU")}{Flag nation of vessel} \item{list("VE_LEN")}{Vessel
+#' length} \item{list("VE_KW")}{Vessel power (in kW)}
+#' \item{list("VE_TON")}{Vessel Tonnage (GT)} \item{list("FT_REF")}{Fishing
+#' trip reference number} \item{list("FT_DCOU")}{Departure country}
+#' \item{list("FT_DHAR")}{Departure harbour} \item{list("FT_DDAT")}{Departure
+#' date} \item{list("FT_DTIME")}{Departure time} \item{list("FT_LCOU")}{Landing
+#' country} \item{list("FT_LHAR")}{Landing harbour}
+#' \item{list("FT_LDAT")}{Landing date} \item{list("FT_LTIME")}{Landing time}
+#' \item{list("LE_ID")}{Log event ID} \item{list("LE_CDAT")}{Catch date}
+#' \item{list("LE_STIME")}{Log event start time} \item{list("LE_ETIME")}{Log
+#' event end time} \item{list("LE_SLAT")}{Log event start position latitude}
+#' \item{list("LE_SLON")}{Log event start position longitude}
+#' \item{list("LE_ELAT")}{Log event end position latitude}
+#' \item{list("LE_ELON")}{Log event end position longitude}
+#' \item{list("LE_GEAR")}{Gear} \item{list("LE_MSZ")}{Mesh size}
+#' \item{list("LE_RECT")}{ICES rectangle} \item{list("LE_DIV")}{ICES division}
+#' \item{list("LE_MET_level6")}{Fishing activity}
+#' \item{list("LE_KG_SPEC")}{Landing weight estimate of species (FAO species
+#' codes)} \item{list("LE_EURO_SPEC")}{Landing value of species (FAO species
+#' codes)} }
+#' @references EU Lot 2 project, Niels T. Hintzen (niels.hintzen@wur.nl)
+#' @source NA
+#' @keywords datasets
+#' @examples
+#' 
+#' data(eflalo)
+#' eflalo <- formatEflalo(eflalo)
+#' str(eflalo)
+#' 
+NULL
+
+
+
+
+
+#' Dataframe containing > 3500 harbours primarily Europe
+#' 
+#' A list of harbour positions primarily located in Europe, including -name,
+#' -latitude, -longitude and a certain -range (in km) the harbour spans
+#' (radius).
+#' 
+#' 
+#' @name euharbours
+#' @aliases euharbours harbours
+#' @docType data
+#' @references EU Lot 2 project, Niels T. Hintzen (niels.hintzen@wur.nl)
+#' @keywords datasets
+#' @examples
+#' 
+#' data(tacsat)
+#' data(euharbours); euharbours <- harbours
+#' 
+#' #modify the range of the harbours to e.g. 2km
+#' euharbours$range <- 2
+#' 
+#' pointInHarbour(tacsat$SI_LONG,tacsat$SI_LATI,euharbours)
+#' 
+NULL
+
+
+
+
+
+#' Polygon dataset of Europe
+#' 
+#' Polygon dataset with the coastlines of Europe.
+#' 
+#' 
+#' @name europa
+#' @docType data
+#' @format A data frame with 83878 observations on the following 5 variables.
+#' \describe{ \item{list("PID")}{a numeric vector} \item{list("SID")}{a numeric
+#' vector} \item{list("POS")}{a numeric vector} \item{list("X")}{Longitude
+#' values} \item{list("Y")}{Latitude values} }
+#' @source NOAA coastline datafile
+#' @keywords datasets
+#' @examples
+#' 
+#' data(europa)
+#' require(PBSmapping)
+#' 
+#' eurPols     <- lonLat2SpatialPolygons(lst=lapply(as.list(sort(unique(europa$SID))),
+#'                         function(x){data.frame(
+#'                           SI_LONG=subset(europa,SID==x)$X,
+#'                           SI_LATI=subset(europa,SID==x)$Y)}))
+#' 
+#' plot(eurPols,col="green",xlim=c(-4,10),ylim=c(48,62))
+#' 
+NULL
+
+
+
+
+
+#' SpatialPolygons dataset of ICESareas
+#' 
+#' A pre-defined SpatialPolygons dataset with the ICES areas.
+#' 
+#' 
+#' @name ICESareas
+#' @docType data
+#' @source www.ices.dk
+#' @keywords datasets
+#' @examples
+#' 
+#' require(sp)
+#' data(ICESareas)
+#' plot(ICESareas)
+#' 
+NULL
+
+
+
+
+
+#' Merge TACSAT formatted data to EFLALO2 formatted data
+#' 
+#' Method to assign the EFLALO2 tripnumber to the accompanying TACSAT records,
+#' which were registered between the departure date and arrival date of a trip.
+#' 
+#' Method only assignes a tripnumber to the tacsat data if there are vms pings
+#' that occur between the departure date and arrival date of a specific vessel.
+#' If no related trip can be found, tripnumber assigned to tacsat equals NA.
+#' 
+#' @name mergeEflalo2Tacsat
+#' @docType data
+#' @param eflalo2 Trip and landings data in the Eflalo2 format
+#' @param tacsat Vms data in the Tacsat format
+#' @author Niels T. Hintzen, Francois Bastardie
+#' @seealso \code{\link{findEndTacsat}}, \code{\link{sortTacsat}},
+#' \code{\link{filterTacsat}}
+#' @references EU lot 2 project
+#' @examples
+#' 
+#'   #Load the eflalo2 and tacsat data
+#' data(eflalo)
+#' data(tacsat)
+#' 
+#' #-Remove duplicated records from tacsat
+#' myf       <- paste(tacsat$VE_REF,tacsat$SI_LATI,tacsat$SI_LONG,
+#'                    tacsat$SI_DATE,tacsat$SI_TIME);
+#' tacsat    <- tacsat[!duplicated(myf),];
+#' 
+#' #-Merge the tacsat and eflalo2 data
+#' #computation time approx 30 seconds
+#' result <- mergeEflalo2Tacsat(eflalo,tacsat)
+#' 
+NULL
+
+
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+NULL
+
+
+
+
+
+#' Vessel Monitoring by Satellite system dataset
+#' 
+#' A dataset consisting of VMS registrations of disguised origin for 2
+#' consecutice years.
+#' 
+#' This example dataset if for example purposes only. Without prior permission
+#' of the authors it is not allowed to use this data other than for example
+#' non-publishable purposes.
+#' 
+#' @name tacsat
+#' @docType data
+#' @format A data frame with 97015 observations on the following 8 variables.
+#' \describe{ \item{list("VE_COU")}{Flag nation of vessel}
+#' \item{list("VE_REF")}{Vessel reference / ID}
+#' \item{list("SI_LATI")}{Latitudinal position (in decimal degrees)}
+#' \item{list("SI_LONG")}{Longitudinal position (in decimal degrees)}
+#' \item{list("SI_DATE")}{Date} \item{list("SI_TIME")}{Time (24 hours clock)}
+#' \item{list("SI_SP")}{Speed (in knots per hour)} \item{list("SI_HE")}{Heading
+#' (in degrees)} }
+#' @references EU Lot 2 project, Niels T. Hintzen (niels.hintzen@wur.nl)
+#' @source NA
+#' @keywords datasets
+#' @examples
+#' 
+#' data(tacsat)
+#' tacsat <- formatTacsat(tacsat)
+#' str(tacsat)
+#' 
+NULL
+
+
+
+
+
+#' Small high frequency test VMS dataset CANNOT BE DISTRIBUTED WITHOUT PRIOR
+#' PERMISSION -> SEE Author
+#' 
+#' A short VMS dataset with high frequency interval rate (every 6 minutes) used
+#' to test spatial and VMS functions. Dataframe contains vessel ID, decimal
+#' longitude and decimal latitude position, speed and heading at ping and date
+#' of ping.
+#' 
+#' 
+#' @name VMShf
+#' @docType data
+#' @format A data frame with 702 observations on the following 6 variables.
+#' \describe{ \item{list("ship")}{ID of ship} \item{list("declat")}{Decimal
+#' notation of longitude position} \item{list("declon")}{Decimal notation of
+#' latitude postion} \item{list("speed")}{Speed at ping}
+#' \item{list("heading")}{Heading at ping} \item{list("date")}{Date and time of
+#' ping} }
+#' @references niels.hintzen@wur.nl
+#' @source niels.hintzen@wur.nl
+#' @examples
+#' 
+#' data(VMShf)
+#' str(VMShf)
+#' 
+#' 
+NULL
+
+
+
+
+
+#' development of R scripts to setup the workflow of vms and logbook data
+#' 
+#' -Design of routines to import VMS and Logbook data\cr -Design of routines to
+#' cluster metiers\cr -Design of routines to link VMS and Logbook data\cr
+#' -Design of routines to define vessel activity\cr -Design of routines to
+#' interpolate VMS data\cr -Design of routines to compute pressure
+#' indicators\cr
+#' 
+#' \tabular{ll}{ Package: \tab VMStools\cr Type: \tab Package\cr Version: \tab
+#' 0.58\cr Date: \tab 2011-12-30\cr License: \tab \cr LazyLoad: \tab yes\cr }
+#' 
+#' @name VMStools-package
+#' @aliases VMStools-package VMStools
+#' @docType package
+#' @author Niels T. Hintzen, Doug Beare, Francois Bastardie, Nicolas Deporte,
+#' Andy South, Neil Campbell, Fabrizio Manco, Clara Ulrich Rescan, Stephanie
+#' Mahevas, Sebastien Demaneche, Josefine Egekvist, Hans Gerritsen, Matthew
+#' Parker-Humphreys
+#' 
+#' Maintainer: Niels T. Hintzen <niels.hintzen@@wur.nl>, Francois Basterdie
+#' <fba@@aqua.dtu.dk>
+#' @references EU Lot 2 project
+NULL
+
+
+
diff --git a/vmstools/R/withinVar.r b/vmstools/R/withinVar.r
index 3721c78..613b440 100644
--- a/vmstools/R/withinVar.r
+++ b/vmstools/R/withinVar.r
@@ -1,10 +1,38 @@
-################################################################################
-# Calculate the cluster's within-variance (square distance between each row of #
-# the cluster and the cluster's center of gravity)                             #
-# we calculate the distance row by row                                         #
-################################################################################
-
-withinVar <- function(oneRowOfCluster,centerOfGravityClusti){
-  comb   <- rbind(centerOfGravityClusti, oneRowOfCluster)
-  sqrComb <- dist(comb)^2
-return(sqrComb)}
\ No newline at end of file
+################################################################################
+# Calculate the cluster's within-variance (square distance between each row of #
+# the cluster and the cluster's center of gravity)                             #
+# we calculate the distance row by row                                         #
+################################################################################
+
+
+
+#' Useful functions for the multivariate analysis of logbooks data for
+#' identifying metiers.
+#' 
+#' This function contains several functions needed for the multivariate
+#' analysis of logbooks data for identifying metiers.
+#' 
+#' 
+#' @param transformation_proportion Transform quantities to percentage values
+#' (between 0 and 100) of each species in the logevent total catch.
+#' @param table_variables Transpose the dataset (change variables into
+#' individuals)
+#' @param scree Implementation of "scree-test"
+#' @param select_species Remove the cluster with the smallest mean of capture
+#' @param building_tab_pca Build the table with the main species
+#' @param test.values Compute the test-value for each species by cluster
+#' @param targetspecies Determine the species with a test-value > 1.96 by
+#' cluster
+#' @param withinVar Calculate the cluster's within-variance
+#' @note A number of libraries are initially called for the whole metier
+#' analyses and must be installed :
+#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda)
+#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER,
+#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark)
+#' @references Development of tools for logbook and VMS data analysis. Studies
+#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2
+#' @export withinVar
+withinVar <- function(oneRowOfCluster,centerOfGravityClusti){
+  comb   <- rbind(centerOfGravityClusti, oneRowOfCluster)
+  sqrComb <- dist(comb)^2
+return(sqrComb)}