Practicals6

Practical 6: Calculating indicators

Socio-economic analysis of area closures

Let's assume that a large area of the North Sea will be closed to fishing because of alternative usage such as nature conservation (e.g. Natura 2000 areas), energy production (windmill parks or gaz/oil extraction), maritime transport, military activity, etc.

library(vmstools)
library(sf)
# let's define an area of interest smack in our data
data(tacsat)
meanLat <- median(tacsat$SI_LATI)
meanLon <- median(tacsat$SI_LONG)
# make a polygon from it
closure <- lonLat2SFPolygons(lst=list(data.frame(SI_LONG=meanLon+c(-0.5,-0.5,0.5,0.5,-0.5),
                                       SI_LATI=meanLat+c(-0.25,0.25,0.25,-0.25,-0.25))))

closure <- st_as_sfc(list(closure))
st_crs(closure) <- 4326

# Plot the area  
X11()
data(europa)
plot(europa,col="grey",xlim=c(-4,10),ylim=c(48,62)); axis(1); axis(2)
plot(closure,col="red",add=T)

Now we have an area that should be closed to fisheries, during the negotiation process the value of the area for the fishing fleet will be a key point. Is the area important for fisheries?

#Start fresh remake tacsatEflalo (or use your previously saved one)
data(eflalo)
tacsat$LE_RECT <- ICESrectangle(tacsat)
tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat)
tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]
tacsatp$IDX       <- 1:nrow(tacsatp)


tacsatFilter      <- filterTacsat(tacsatp,st=c(1,6),hd=NULL,remDup=T) # quick and dirty way to define actity
tacsatp$SI_STATE  <- 0
tacsatp$SI_STATE[tacsatFilter$IDX] <- 1
tacsatp		  <- subset(tacsatp,SI_STATE == 1)

tacsatEflalo      <- splitAmongPings(tacsat=tacsatp,eflalo=eflalo,
variable="all",level=c("day","ICESrectangle","trip"),conserve=T)

# calculate the total value per ping and plot it
tacsatEflalo$LE_TOT_VAL <- rowSums(tacsatEflalo[grep("LE_EURO_",names(tacsatEflalo))],na.rm=T)

X11()
p <- plotTools(tacsatEflalo,level="gridcell",xlim=c(-5,10),ylim=c(48,62),zlim=NULL,log=F,gridcell=c(0.1,0.1),color=NULL,control.tacsat=list(clm="LE_TOT_VAL"))
p[[1]] + geom_sf(data=closure,aes(fill=1),colour="red",fill=NA)

So there is some fishing there but it doesn't seem to be the main fishing ground of the fleet. Now that we have a feel for it let's quantify the value of the area.

# identify the pings that fall in the area
# first you want to make sure that you are using the right projection for your data and your shapefile
pings  <- st_as_sf(tacsatEflalo,coords=c("SI_LONG","SI_LATI"))
st_crs(pings) <- 4326

# then see which points are in our closure
closePings <- st_over(pings,closure)

table(closePings,useNA="ifany")

So we have 941 fishing pings falling in the area, or about 3% of the total number of fishing pings! So how much does this represent in revenue ?

# calculate the revenue in the area
tacsatEflalo$closed <- as.factor(!is.na(closePings))

table(tacsatEflalo$closed)

aggregate(tacsatEflalo["LE_TOT_VAL"],by=list(closed=tacsatEflalo$closed),FUN=sum,na.rm=T)

# plot the revenue
library(ggplot2)
r <- ggplot(tacsatEflalo,aes(x=SI_YEAR,weight=LE_TOT_VAL))
r+geom_bar(width=.8,aes(fill=closed)) + ylab("EURO")

The next step is to identify who will be impacted by the closure

# we can easily look at it by gear
tacsatEflalo %>% 
filter(closed=="TRUE") %>% 
group_by(LE_GEAR)%>% 
summarize(value=sum(LE_TOT_VAL,na.rm=T))

# and also plot it by gear
r <- ggplot(subset(tacsatEflalo,closed=="TRUE"),aes(x=LE_GEAR,weight=LE_TOT_VAL))
r+geom_bar(width=.8,aes(fill=LE_GEAR)) + ylab("EURO")

Exercise 1

Calculate the revenue in the closed area for the vessels > 24 m
Is the area more fished during some months?
Let's assume that you are interested in the total landings in KG, not revenue, can you tell how much is fished in the area?

Show answer

We will filter for vessels larger than 24m (that info we need to get first from the eflalo file) and for the closed area. Then all we need to do is to sum the total value column.

require(dplyr) 
tacsatEflalo$VE_LEN <- eflalo$VE_LEN[match(tacsatEflalo$FT_REF,eflalo$FT_REF)]

tacsatEflalo %>% filter(closed==T, VE_LEN > 24) %>% summarize(revenue=sum(LE_TOT_VAL,na.rm=T))

We simply add a grouping by month to the code above to get the monthly aspect into the analysis.

require(lubridate)
tacsatEflalo$SI_MONTH <- month(tacsatEflalo$SI_DATIM)
tacsatEflalo %>% filter(closed==T) %>% group_by(SI_MONTH) %>% summarize(revenue=sum(LE_TOT_VAL,na.rm=T))

For value we have created the 'LE_TOT_VAL' column, we can do the same for the KGs.

tacsatEflalo$LE_TOT_KG <- rowSums(tacsatEflalo[grep("LE_KG_",names(tacsatEflalo))],na.rm=T)

tacsatEflalo %>% filter(closed==T) %>% summarize(kgs=sum(LE_TOT_KG,na.rm=T))

Individual analysis

We now have a feel for the importance of the area for the fleet in terms of total revenue, what about individual fishers? The area is not very important all in all but it may be for some fishers

# because we look at individual fishers, we remove the "conserved" landings
tacsatEflalo      <- splitAmongPings(tacsat=tacsatp,eflalo=eflalo,
variable="all",level=c("day","ICESrectangle","trip"),conserve=F)
pings  <- st_as_sf(tacsatEflalo,coords=c("SI_LONG","SI_LATI"))
st_crs(pings) <- 4326

closePings <- st_over(pings,closure)
tacsatEflalo$closed <- as.factor(!is.na(closePings))

tacsatEflalo$SI_YEAR <- as.factor(year(tacsatEflalo$SI_DATIM))


# we recalculate the total value per ping
tacsatEflalo$LE_TOT_VAL <- rowSums(tacsatEflalo[grep("LE_EURO_",names(tacsatEflalo))],na.rm=T)

# for each point we calculate the proportion of the vessel revenue
tacsatEflalo <- within(tacsatEflalo,{valPerc <- LE_TOT_VAL/ave(LE_TOT_VAL,VE_REF,SI_YEAR,FUN=sum)*100})

# We can then aggregate the value and only keep the part from the closed areas
IndAnalysis <- aggregate(tacsatEflalo["valPerc"],by=list(closed=tacsatEflalo$closed,SI_YEAR=tacsatEflalo$SI_YEAR,VE_REF=tacsatEflalo$VE_REF),FUN=sum,na.rm=T)

IndAnalysis <- subset(IndAnalysis,closed=="TRUE")


plotISLA <- function(data,Var,breaks=c(-.1,0,10,20,30,40,50,75,100)){
      labs <- c("0",paste(">",breaks[2:(length(breaks)-2)],"-",breaks[3:(length(breaks)-1)],sep=""),paste(">",breaks[length(breaks)-1],sep=""))
      
      data$stressVar <- cut(data[,Var],breaks=breaks,labels=labs)
      m <- ggplot(data, aes(x=stressVar,fill=SI_YEAR),width=0.2)
      print(m + geom_bar(colour='black',position=position_dodge(width=0.9))+theme_bw()+ xlab('% affected by closure')+ylab('number of vessels')+theme(legend.position='bottom',axis.text.x = element_text(angle = 45,vjust=.5)) )
}

plotISLA(IndAnalysis,"valPerc")

Looking at the data individually you can see that some vessels will be highly affected if the area is closed. As a rule of thumb 10% of the revenue is a large impact on a fisherman. Let's look at how many fishermen are strongly impacted.

highImpact <- subset(IndAnalysis,valPerc>=10)
table(highImpact$SI_YEAR)
# 33 vessels had at least 10% of their revenue in the area in 1800 and 20 in 1801

summarizeTacsat(tacsatEflalo)
# 282 vessels in merged tacsatEflalo

Exercise 2

How many vessels fish more than 50% of their revenue in the area in 1800?
What gear do they use?
Which harbour do they land in?

Show answer

We can take the object created in the practical above that had the percentage of value in the closed area by vessel, the 'IndAnalysis' object. Then we filter the conditions we require and simply count the number of vessels.

IndAnalysis %>% filter(SI_YEAR == 1800, closed==T, valPerc > 50) %>% summarize(numVessels = n_distinct(VE_REF))

We need to get the vessel names out from the previous exercise so we can use that information for a new subsetting.

subInd  <- IndAnalysis %>% filter(SI_YEAR == 1800, closed==T, valPerc > 50) 
VE_REFs <- unique(subInd$VE_REF)
# look per vessel
tacsatEflalo %>% filter(SI_YEAR == 1800, VE_REF %in% VE_REFs) %>% reframe(unique(LE_GEAR),.by=VE_REF) 
# or value per gear type for all those vessels
tacsatEflalo %>% filter(SI_YEAR == 1800, VE_REF %in% VE_REFs) %>% group_by(LE_GEAR) %>% summarize(value=sum(LE_TOT_VAL))

We need to add the landing harbour information from the logbooks to the tacsatEflalo file first, then we can check in which harbours they are landing

tacsatEflalo$FT_LHAR <- eflalo$FT_LHAR[match(tacsatEflalo$FT_REF,eflalo$FT_REF)]

# look per vessel
tacsatEflalo %>% filter(SI_YEAR == 1800, VE_REF %in% VE_REFs) %>% reframe(unique(FT_LHAR),.by=VE_REF)
# or value per harbour for all those vessels
tacsatEflalo %>% filter(SI_YEAR == 1800, VE_REF %in% VE_REFs) %>% group_by(FT_LHAR) %>% summarize(value=sum(LE_TOT_VAL))

So that would be harbours HD which is Den Helder in the Netherlands, and IJM which would be IJmuiden and SL which is Stellendam. Feel free to google these places.

Estimating aggregation of fishing

The way fisheries aggregate determines how much they are affected by Natura 2000 areas and how much area they leave aside and remains untrawled. Nick Ellis and co-authors in 2014 published a paper on how the aggregation behaviour of fisheries could be explained. They suggested three types of processes: a hit or miss, a random and an aggregated distribution. In this practical we will simulate all three of these, show them visually and try to estimate the amount of aggregation in our example dataset. We first start with visualizing the 3 different types of fisheries spatial use.

#Create a study-area (or simply a matrix where each gridcell represents some part of your study area)
SA <- matrix(0,nrow=100,ncol=100)
image(SA,xaxt="n",yaxt="n",xlab="Gridcell X",ylab="Gridcell Y")
axis(1,at=seq(0,1,length.out=100),labels=1:100)
axis(2,at=seq(0,1,length.out=100),labels=1:100)

#Option 1: fisheries is a matter of hitting or missing a specific spot
# In this case we assume that a fishermen can only hit an area at most once, and that each area as a 50% chance of getting trawled

par(mfrow=c(1,3))
bdat	<- rbinom(10000,size=1,prob=0.5)
SA[]  <- bdat
image(SA,xaxt="n",yaxt="n",xlab="Gridcell X",ylab="Gridcell Y",main="Binomial",col=rev(heat.colors(50)),zlim=c(0,10))
axis(1,at=seq(0,1,length.out=100),labels=1:100)
axis(2,at=seq(0,1,length.out=100),labels=1:100)
box()

#Option 2: a random process in selecting an area to fish describes the fishery
pdat  <- rpois(10000,lambda=0.5)
SA[]  <- pdat
image(SA,xaxt="n",yaxt="n",xlab="Gridcell X",ylab="Gridcell Y",main="Poisson",col=rev(heat.colors(50)),zlim=c(0,10))
axis(1,at=seq(0,1,length.out=100),labels=1:100)
axis(2,at=seq(0,1,length.out=100),labels=1:100)
box()

#Option 3: a fishery really selects specific hotspots to go out and fish
ndat  <- rnbinom(10000,size=0.1,mu=0.5)
SA[]  <- ndat
image(SA,xaxt="n",yaxt="n",xlab="Gridcell X",ylab="Gridcell Y",main="Negative Binomial",col=rev(heat.colors(50)),zlim=c(0,10))
axis(1,at=seq(0,1,length.out=100),labels=1:100)
axis(2,at=seq(0,1,length.out=100),labels=1:100)
box()

Exercise 3

How much 'effort' is used in each of the 3 fishery types
What is the area unfished in each of the 3 fishery types

Show answer

In all cases we work with a 50% probability of an grid cell being trawled. We take 10.000 draws which then equals out to 5000 units of effort. From the data we can also see that it's close to that value. sum(bdat); sum(pdat); sum(ndat)

We need to count the number of zero's in our SA matrix or the datastrings themselves and divide that by the total number of records. length(which(bdat==0))/length(bdat); length(which(pdat==0))/length(pdat);length(which(ndat==0))/length(ndat);

The one thing we notice is this theoretical example is that there are no clusters of fishing, as we usually see. Hotspots are simply often clustered together. Generating such a pattern is not too complicated, but it involves getting some sort of spatial-correlation structure up and running. These authors provide supplemenatry material where do show how to do that: Spatial correlation

Let's have a look at our example data and see what kind of type we have there. There are a couple of steps we need to get through:

Take a nice sample to play with
Create a grid
Put our VMS pings onto that grid
Count the number of times grid cells have been trawled
Fit each of these three distributions to that count data and see what type of aggregation we have

data(tacsat)
tacsat  <- sortTacsat(tacsat)

#Step 1
#- Take only 1 year of data
tacsat  <- subset(tacsat,format(tacsat$SI_DATIM,format="%Y")==1800)
#- Lets see in what ICES rectangle these positions are located
tacsat$LE_RECT  <- ICESrectangle(tacsat)
print(sort(table(tacsat$LE_RECT)))
tacsat  <- subset(tacsat,LE_RECT =="33F3")

#Step 2
#- Create a grid
resx    <- 1/100
resy    <- 1/100
ranges  <- ICESrectangle2LonLat("33F3",midpoint=T)
xrange  <- an(c(ranges[2]-0.5,ranges[2]+0.5))
yrange  <- an(c(ranges[1]-0.25,ranges[1]+0.25))
grd     <- createGrid(xrange,yrange,resx,resy)

#Step 3
#- Project our VMS pings onto the grid
sp      <- st_as_sf(tacsat,coords=c("SI_LONG","SI_LATI")) #Turn points into spatial points
tacsat$grID <- st_over(sp,grd)  # project points onto grid to see in what gridcell they belong

#Step 4
#- Make a count of the number of time specific gridcells are trawled
table(tacsat$grID)
#- Make a count of the number of times gridcells are trawled 0 - x times
table(table(tacsat$grID))
freq    <- matrix(table(table(tacsat$grID)),dimnames=list(names(table(table(tacsat$grID))),"Observed"))
freq    <- rbind(matrix(length(which(!1:10000 %in% unique(tacsat$grID))),dimnames=list(0,"Observed")),freq)

#Step 5
#- What would we have predicted given scenario 1, 2 or 3
binom   <- matrix(round(dbinom(0:6,size=1,prob=0.5)*sum(freq)),dimnames=list(0:6,"Binomial")) #Scenario 1
random  <- matrix(round(dpois(0:6,lambda=sum(freq[2:7])/sum(freq))*sum(freq)),dimnames=list(0:6,"Random")) #Scenario 2
aggre   <- matrix(round(dnbinom(0:6,size=(sum(freq[2:7])/sum(freq))/0.2,mu=sum(freq[2:7])/sum(freq))*sum(freq)),dimnames=list(0:6,"Aggregated")) #Scenario 3

comb    <- cbind(freq,binom,random,aggre)
barplot(t(comb),beside=T,col=1:4)
legend("topright",fill=1:4,legend=colnames(comb))

Exercise 4

What is the value 0.2 doing in the 'aggre <- matr...' line? What does it represent?

Show answer

That is the aggregation parameter that we're using to simulate this data

Finally, let's find the real aggregation value

estimateAggregation(table(table(tacsat$grID)),trustZero=F)[2] #This is your aggregation parameter
res <- predictAggDistri(table(table(tacsat$grID)),trustZero=F)
barplot(t(predictAggDistri(table(table(tacsat$grID)),trustZeros=F)),beside=T,col=1:2)
legend("topright",fill=1:2,legend=c("Observed","Predicted"))

Calculating area use indicators

For a start, let's take a simple indicator to calculate ourselves. Let's define: the total surface area trawled by year. What we need to do for this exercise is:

clean the tacsat and eflalo data
merge them together
perform activity analyses and take fishing activity only
select bottom trawling gear only
define the width of the gear
define the grid

We have the choice to interpolate the data or not, but to keep things simple, we will not do that here. Obviously, the function to add a width to a gear, seen before, could be useful. However, we take less of a true spatial approach here (for most spatial analyses, your data structure is fine as vectors rather than as a grid!), whereby the 'addWidth' function would simply take too long to run.


data(tacsat);data(eflalo)

#Now clean the tacsat and eflalo dataset...
tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat)
tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]

#Again, the quick and dirty fishing activity analyses
tacsatp <- filterTacsat(tacsatp,st=c(2,6),hd=NULL,remDup=T)

#Select gears
table(tacsatp$LE_GEAR)
gears <- c("OTB","PTB","TBB")
tacsatp <- subset(tacsatp,LE_GEAR%in% gears)
gearWidth <- data.frame(LE_GEAR=gears,LE_WIDTH=c(87,2*87,24))

#Define the grid & subset the area
spatBound <- list(x=c(-5,10),y=c(48,62))
idxlon    <- which(tacsatp$SI_LONG >= spatBound$x[1] & tacsatp$SI_LONG <= spatBound$x[2])
idxlat    <- which(tacsatp$SI_LATI >= spatBound$y[1] & tacsatp$SI_LATI <= spatBound$y[2])
tacsatp   <- tacsatp[idxlon[which(idxlon %in% idxlat)],]
grd <- createGrid(xrange=c(unlist(spatBound$x)),
yrange=c(unlist(spatBound$y)),
resx=0.1,resy=0.05,type="GridDF") #Let's take a tenth of an ICES rectangle for a start

So far so good, you've all seen these preparation steps a number of times now, but it's good to learn them by heart as you will need to go over these time and time again (look at Practicals2).

The next step is to start calculating the area trawled for each VMS ping. It might be handy here to store the results by year in some sort of data-frame again, so you can access it later, and also derive other results from it.

Area trawled = duration of trawling x width of gear x speed of a vessel. Let's work this equation out for each of the VMS pings in the dataset.


tacsatp <- sortTacsat(tacsatp)

#Calculate duration of trawling per ping
tacsatp <- intervalTacsat(tacsatp,level="trip",fill.na=TRUE)
tacsatp$SI_INTV <- tacsatp$INTV
gearWidth$LE_GEAR <- ac(gearWidth$LE_GEAR)
tacsatp <- merge(tacsatp,gearWidth,by="LE_GEAR")
tacsatp$TR_AREA <- (tacsatp$SI_INTV / 60) * (tacsatp$LE_WIDTH / 1000) * (tacsatp$SI_SP *1.852)

For the full tacsat dataset we have now calculated the area trawled per ping, however, not yet per year. We could easily split up the tacsat dataset into the two years (1800 and 1801) and aggregate over the trawled area column. But, as indicated before, if we want a bit more flexibility, it might be good to store the data first and perform calculations later-on.


#Now align the VMS pings to the grid
#We have to turn the coordinates of the tacsat dataset into spatial points first
grd$TR_AREA_1800 <- 0
grd$TR_AREA_1801 <- 0
for(iYr in c(1800:1801)){
  subTacsat <- subset(tacsatp,year(SI_DATIM)==iYr)
  sp <- st_as_sf(subTacsat,coords=c("SI_LONG","SI_LATI"))

  #The spatial grid dataframe is a long list (nrow(spgDF@data)) and the idx tells me which row number (associated with a grid cell) the VMS ping is located in.
  idx <- st_over(sp,grd)
  agg <- aggregate(subTacsat$TR_AREA,  by=list(idx),FUN=sum,na.rm=T)
  colnames(agg) <- c("grID","TR_AREA")

  #- add the data to the grid object
  grd[agg$grID,grep(iYr,colnames(grd))] <- agg$TR_AREA
}

#Have a look at some data (where Y1800 is not zero)
head(grd[which(grd$TR_AREA_1800>0),])

#And calculate the indicator value
colSums(st_drop_geometry(grd[,c("TR_AREA_1800","TR_AREA_1801")]),na.rm=T)

However, we just set this system up to be a bit more flexible and use the data for other questions too. Can we now easily move from total area trawled to percentage of the area untrawled? Or could we even see which area has been trawled in 1800 but not in 1801?


#Add surface of each gridcell to the dataframe
st_crs(grd) <- 4326
grd$surface <- st_area(grd)/(1000*1000)

#Percentage trawled
idxun <- which(grd$TR_AREA_1800 > 0)
sum(grd$surface[idxun]) / sum(grd$surface) * 100

#Area trawled in 1800 but not in 1801
idx <- which(grd$TR_AREA_1800 > 0 & grd$TR_AREA_1801 == 0)
sum(grd$surface[idx]) / sum(grd$surface)

All in all, a lot to play with in a rather flexible way.

Exercise 5

Calculate the total area trawled by the TBB gears only (in 1800)
In how many grid cells was the trawling intensity > 0.5? (i.e. where has the area trawled exceeded half the surface of the area)

Show answer

1. We need to assign gears to our VMS data first, then make a selection for TBB in the year 1800 and then sum the area trawled.

tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)]
tacsatp %>% filter(year(SI_DATIM)==1800, LE_GEAR == "TBB") %>% summarize(AreaTrawled=sum(TR_AREA,na.rm=T))

We will do this by re-using the code provided above but making an additional selection now for gear type. That will give us, within our grid, the swept areas which we can then check against the surface area of the gridcell itself (of which we take half).

grd$TR_AREA_1800 <- 0
grd$TR_AREA_1801 <- 0
for(iYr in c(1800:1801)){
  subTacsat <- subset(tacsatp,year(SI_DATIM)==iYr & LE_GEAR == "TBB")
  sp <- st_as_sf(subTacsat,coords=c("SI_LONG","SI_LATI"),crs=st_crs(4326))

  #The spatial grid dataframe is a long list (nrow(spgDF@data)) and the idx tells me which row number (associated with a grid cell) the VMS ping is located in.
  idx <- st_over(sp,grd)
  agg <- aggregate(subTacsat$TR_AREA,  by=list(idx),FUN=sum,na.rm=T)
  colnames(agg) <- c("grID","TR_AREA")

  #- add the data to the grid object
  grd[agg$grID,grep(iYr,colnames(grd))] <- agg$TR_AREA
}
which(an(st_drop_geometry(grd)$TR_AREA_1800) > (0.5*an(st_drop_geometry(grd)$surface)))

Practicals6

Practical 6: Calculating indicators

Socio-economic analysis of area closures

Exercise 1

Individual analysis

Exercise 2

Estimating aggregation of fishing

Exercise 3

Exercise 4

Calculating area use indicators

Exercise 5

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally