This script, derived from the 2016/17 BTO scripts, incorporates the code updates required for 2021. The script

  • Calculates and plots the relative abundance and six yearly mean relative abundances trends for each breeding species, across each OSPAR region and subregion
  • Plots the species latest six year mean relative abundance value compared to its threshold value and compares this latest threshold status to a previous years threshold status, to produce a traffic light plot for the species within the OSPAR regions and birds subdivisions.

Set the year range, add the regions and sub region labels into vectors and create an empty dat_sum list to store the species+regions/subregions yearly and six yearly mean relative abundances.

startYearBreeding <- list(
  Im = 1991,
  In = 1991,
  Io = 1991,
  I = 1991,
  IIa = 1991,
  IIb = 1991,
  IIc = 1991,
  IId = 1991,
  IIdw = 1991, # Missing Wadden sea counts have effect on species trend
  IIe = 1991,
  IIf = 1991,
  II = 1991,
  IIw = 1991, # Missing Wadden sea counts have effect on species trend
  III = 1991,
  IV = 1991
)
endYearBreeding <- list(
  Im = 2020,
  In = 2020,
  Io = 2020,
  I = 2020,
  IIa = 2019,
  IIb = 2020,
  IIc = 2020,
  IId = 2019,
  IIdw = 2017, # Missing Wadden sea counts have effect on species trend
  IIe = 2019,
  IIf = 2019,
  II = 2019,
  IIw = 2017, # Missing Wadden sea counts have effect on species trend
  III = 2019,
  IV = 2016
)

# Previous assessment year used to compare threshold category changes in traffic light plots
previousYearBreeding <- 2014

# species where reduced Wadden year range 1991 - 2017 should be used instead of 1991 -2019
wadden_species <- c("Arctic_tern", "Barnacle_goose", "Black_headed_gull", "Common_gull", "Common_tern", "Eurasian_spoonbill", "Great_black_backed_gull", "Great_cormorant", "European_herring_gull", "Kentish_plover", "Lesser_black_backed_gull", "Little_tern", "Mediterranean_gull", "Eurasian_oystercatcher", "Pied_avocet", "Common_ringed_plover", "Sandwich_tern")

# list of OSPAR regions and bird subdivision
region_labels <- c("I", "II", "III", "IV")
sub_region_labels <- c("Im", "In", "Io", "IIa", "IIb", "IIc", "IId", "IIe", "IIf")
region_sub_region_labels <- c(region_labels, sub_region_labels)

# empty list to store species relative abundance values for regions and sub regions
dat_unsm <-list()

Create the directory structure for the results

# create directory for output data
dir.create(path("results"))
dir.create(path("results", "breeding_out"))
dir.create(path("results", "breeding_baseline_model"))

# and directory for EMECO Validation datasets
dir.create(path("results", "EMECO checks"))
dir.create(path("results", "EMECO checks", "breeding_out"))
dir.create(path("results", "EMECO checks", "species data"))

Import the species and sites information

# get .csv file containing species names & number of eggs
add.info = read.table(path("data", "number of eggs.csv"), header = TRUE, sep = ",",
                      stringsAsFactors = FALSE)

# standardise common names with underscores and not spaces or dashes in name
add.info$Common_name = str_replace_all(add.info$Common_name, "[:space:]|-", "_")

# bring in sites list to get ospar regions/sub-regions
sites = read.csv(path("data", "BreedingSites ABUNDANCE.csv"), header = TRUE)

Import the species yearly relative abundance counts with one data frame for each species+region eg Arctic_Skua_OSPARI, Arctic_Skua_OSPARII, Arctic_Skua_OSPARIII etc.

# get list of files containing breeding success data
files.list = list.files(path("data", "breeding"))
max_year_range <- full_seq(c(reduce(startYearBreeding, min), 
                           reduce(endYearBreeding, max)), 1)

# work out which files came from seabird wizard, change so they only have 3 
## columns (Year, Colony, Count), make sure all are formatted the same way & read them in
for(i in 1:length(files.list)){
  
  dat = read.table(path("data", "breeding", files.list[i]), header = TRUE, sep = ",",
                   stringsAsFactors = FALSE)
  if(ncol(dat) == 5)    {   
    dat = dat[,c(3,2,4)]
    names(dat) = c("Year", "Colony", "Count")
  }
  
  # standardise common names with underscores and not spaces or dashes in name
  new_name = str_remove(files.list[i], ".csv")
  new_name = str_replace_all(new_name, "[:space:]|-", "_")
  assign(new_name, dat)
}

Remove species in regions / sub-regions failing analysis following imputation using the GAM method

Common_eider_OSPARII <- Common_eider_OSPARII %>%
  filter(Colony != "OSPARIId")
rm(Manx_shearwater_OSPARIII)
files.list <- files.list %>% 
  discard(function(x) {x == "Manx_shearwater_OSPARIII.csv"})
rm(European_storm_petrel_OSPARIII)
files.list <- files.list %>% 
  discard(function(x) {x == "European_storm-petrel_OSPARIII.csv"})
rm(Atlantic_puffin_OSPARIII)
files.list <- files.list %>% 
  discard(function(x) {x == "Atlantic_puffin_OSPARIII.csv"})

From the list of species yearly relative abundance counts dataframes, in data.list vector, extract the list of species into a species vector.

# get unique species names
data.list = str_remove(files.list, ".csv")
data.list = str_replace_all(data.list, "[:space:]|-", "_")
species = unique(sapply(strsplit(data.list,"_OSPAR"), "[", 1))

For each species

  • set the target threshold, depending on number of eggs species lays
  • combine the abundance counts across all regions in to new_dat dataframe, converting to wide format with years as columns and exporting as csv file, eg Arctic_Skua.csv
  • export the species abundance data eg Arctic_Skua_breeding.csv
  • calculate the yearly and six yearly mean relative abundances and plotting relative abundance trends by running calculate_breeding_relative_abundance function for each region.
  • calculate the yearly and six yearly mean relative abundances and plotting relative abundance trends by running calculate_breeding_relative_abundance function for each sub region.
  • yearly and six year mean relative abundance values for each species+region/sub region added to dat_unsm data frame.
# log setting baseline regression results
luzlogr::openlog(path("results", "breeding_baseline_model", "Breeding Seabirds setting baseline.csv"))
luzlogr::printlog("species,region,count_flag,p-value,method,baseline_count,first_relative_abundance", ts = FALSE)

for(i in 1:length(species)){

  # create species directory
  dir.create(path("results", "breeding_out", species[i]))
  dir.create(path("results", "breeding_baseline_model", species[i]))
  
  # get additional info
  species.info = subset(add.info, Common_name == species[i])
  
  # get number of regions for species
  data = grep(str_c("^", species[i],"_O"), data.list)
  
  # set target 0.7 if species lays >1 egg, 0.8 if species lays 1 egg
  ifelse(species.info$Eggs[1] == 1, target <- 0.8, target <- 0.7)
  
  # combine data from each region data frame
  dat <- list()
  for (j in 1:length(data)) {
    dat[[j]] <- get(data.list[data[j]]) 
  }
  new_dat <- bind_rows(dat)

  # convert abundance count data to wide format with each year a column and add
  ## site information columns
  new_dat = cast(new_dat, Colony ~ Year, value.var = Count, fun.aggregate = mean)
  new_dat = merge(new_dat, sites, all.x = TRUE)
  
  # export species data
  write.csv(new_dat, path("results", "EMECO checks", "species data", str_glue("{species[i]}_breeding.csv")), row.names = FALSE)
  
  # calculate and plot relative abundance values for regions
  for(j in as.numeric(as.roman(region_labels))) {

    # Update start and end year for each OSPAR REGION
    if (j == 1) {
      startYear = startYearBreeding$I
      endYear = endYearBreeding$I
    } else if (j ==2 & !species[i] %in% wadden_species) {
      startYear = startYearBreeding$II
      endYear = endYearBreeding$II
    } else if (j ==2 & species[i] %in% wadden_species) {
      startYear = startYearBreeding$IIw
      endYear = endYearBreeding$IIw
    } else if (j == 3) {
      startYear = startYearBreeding$III
      endYear = endYearBreeding$III
    } else if (j == 4) {
      startYear = startYearBreeding$IV
      endYear = endYearBreeding$IV
    }
    
    dat_unsm[[str_glue("{species[i]}_{as.roman(j)}")]] <-
      calculate_breeding_relative_abundance(ospar = subset(new_dat, OSPAR_REGION == j),
                                            label = as.roman(j),
                                            species_name = species[i],
                                            target_threshold = target,
                                            start_year = startYear,
                                            end_year = endYear)
  }
  
  ## calculate and plot relative abundance values for sub regions
  
  for(j in sub_region_labels) {
    
    # Update start and end year for each OSPAR SUB REGION
    if (j == "Im") {                      
      endYear = endYearBreeding$Im
      startYear = startYearBreeding$Im
    } else if (j == "In") {
      endYear = endYearBreeding$In
      startYear = startYearBreeding$In
    } else if (j == "Io") {
      endYear = endYearBreeding$Io
      startYear = startYearBreeding$Io
    } else if (j == "IIa") {
      endYear = endYearBreeding$IIa
      startYear = startYearBreeding$IIa
    } else if (j == "IIb") {
      endYear = endYearBreeding$IIb
      startYear = startYearBreeding$IIb
    } else if (j == "IIc") {
      endYear = endYearBreeding$IIc
      startYear = startYearBreeding$IIc
    } else if (j == "IId" & !species[i] %in% wadden_species) {
      endYear = endYearBreeding$IId
      startYear = startYearBreeding$IId
    } else if (j == "IId" & species[i] %in% wadden_species) {
      endYear = endYearBreeding$IIdw
      startYear = startYearBreeding$IIdw
    } else if (j == "IIe") {
      endYear = endYearBreeding$IIe
      startYear = startYearBreeding$IIe
    } else if (j == "IIf") {
      endYear = endYearBreeding$IIf
      startYear = startYearBreeding$IIf
    }
    
    dat_unsm[[str_glue("{species[i]}_{j}")]] <- 
      calculate_breeding_relative_abundance(ospar = subset(new_dat, SUBADMIN == j), 
                                            label = j,
                                            species_name = species[i],
                                            target_threshold = target,
                                            start_year = startYear,
                                            end_year = endYear)
  }
}
dat_unsm <- bind_rows(dat_unsm)

# close baseline regression results log
luzlogr::closelog(sessionInfo = FALSE)

Splits and exports dat_unsm dataframe into region and sub region dataframes with species yearly relative abundance values as columns eg OSPARI_unsm_all, OSPARIm_unsm_all, OSPARII_unsm_all

# create yearly relative abundance data frames for each region and sub region
for(i in region_sub_region_labels) {
  
  # get yearly relative abundance values for the region/subregion
  dat_unsm_label <- dat_unsm[dat_unsm$region == str_glue("OSPAR{i}"),]
  dat_unsm_label <- dat_unsm_label[,c("years", "index", "species")]
  dat_unsm_label <- dat_unsm_label %>% 
    pivot_wider(names_from = species, values_from = index)
  dat_unsm_label <- dat_unsm_label[order(dat_unsm_label$years),]
  
  # export the yearly relative abundance values for the region/subregion as a csv file
  write.csv(dat_unsm_label, path("results", "EMECO checks", "breeding_out",
                                 str_glue("OSPAR{i}_unsm_all.csv")), row.names = FALSE)
  
  # rename dataframe with region/sub region name
  assign(str_glue("OSPAR{i}_unsm_all"), dat_unsm_label)
}

Splits and exports dat_unsm dataframe into region and sub region dataframes with species six yearly mean relative abundance values as columns eg OSPARI_unsm_all_mean, OSPARIm_unsm_all_mean, OSPARII_unsm_all_mean

# create six year mean relative abundance data frames for each region and sub region
for(i in region_sub_region_labels) {
  
  # get six year mean relative abundance values for the region/subregion
  dat_unsm_mean_label <- dat_unsm[dat_unsm$region == str_glue("OSPAR{i}"),]
  dat_unsm_mean_label <- dat_unsm_mean_label[,c("years", "six_year_mean", "species")]
  dat_unsm_mean_label <- dat_unsm_mean_label %>% 
    pivot_wider(names_from = species, values_from = six_year_mean)
  dat_unsm_mean_label <- dat_unsm_mean_label[order(dat_unsm_mean_label$years),]
  
  # export the six year mean relative abundance values for the region/subregion as a csv file
  write.csv(dat_unsm_mean_label, path("results", "EMECO checks", "breeding_out",
                                      str_glue("OSPAR{i}_unsm_all_mean.csv")), row.names = FALSE)
  
  # rename dataframe with region/sub region name
  assign(str_glue("OSPAR{i}_unsm_all_mean"), dat_unsm_mean_label)
}

Tidy up removing all objects from global environment except object required in the subsequent analyses

  • yearly relative abundance values in region/sub region dataframes eg OSPARI_unsm_all, OSPARIm_unsm_all, OSPARII_unsm_all
  • six yearly mean relative abundance values in region/sub region dataframes eg OSPARI_unsm_all_mean, OSPARIm_unsm_all_mean, OSPARII_unsm_all_mean
# tidy up
unsm_all_mean <- vector()
for (i in region_sub_region_labels) {
  unsm_all_mean[i] <- c(str_glue("OSPAR{i}_unsm_all_mean"))
}

required_data <- c("species", "add.info", "endYearBreeding", "previousYearBreeding",
                   "region_labels", "sub_region_labels", "region_sub_region_labels",
                   "wadden_species")

rm(list=ls()[!ls() %in% c(unsm_all_mean, required_data)])

Combine all the breeding species’ six year relative abundance means for all the regions and subregions into one data frame, ByRegion.dat.mean

Region <- c(str_glue("OSPAR{region_sub_region_labels}_unsm_all_mean"))
dat <- list()
for (i in 1:length(Region)) {
  Region.dat <- get(Region[[i]])
  Region.dat$region <- str_remove(Region[[i]], "_unsm_all_mean")
  dat[[i]] <- Region.dat
}
ByRegion.dat.mean <- bind_rows(dat)

From ByRegion.dat.mean get the latest six year relative abundance mean for each species as rows and OSPAR region / sub-region as columns including species number of eggs and functional group. Sorted by functional group and species scientific name.

# select the latest year for each region
ByRegion.dat.latest.mean.I <- subset(ByRegion.dat.mean, 
                                     region == "OSPARI" & years == endYearBreeding$I)
ByRegion.dat.latest.mean.Im <- subset(ByRegion.dat.mean, 
                                      region == "OSPARIm" & years == endYearBreeding$Im)
ByRegion.dat.latest.mean.In <- subset(ByRegion.dat.mean, 
                                      region == "OSPARIn" & years == endYearBreeding$In)
ByRegion.dat.latest.mean.Io <- subset(ByRegion.dat.mean, 
                                      region == "OSPARIo" & years == endYearBreeding$Io)
ByRegion.dat.latest.mean.II <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)], 
                                      region == "OSPARII" & years == endYearBreeding$II)
ByRegion.dat.latest.mean.IIw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)], 
                                       region == "OSPARII" & years == endYearBreeding$IIw)
ByRegion.dat.latest.mean.IIa <- subset(ByRegion.dat.mean, 
                                       region == "OSPARIIa" & years == endYearBreeding$IIa)
ByRegion.dat.latest.mean.IIb <- subset(ByRegion.dat.mean, 
                                       region == "OSPARIIb" & years == endYearBreeding$IIb)
ByRegion.dat.latest.mean.IIc <- subset(ByRegion.dat.mean, 
                                       region == "OSPARIIc" & years == endYearBreeding$IIc)
ByRegion.dat.latest.mean.IId <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)], 
                                       region == "OSPARIId" & years == endYearBreeding$IId)
ByRegion.dat.latest.mean.IIdw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)], 
                                        region == "OSPARIId" & years == endYearBreeding$IIdw)
ByRegion.dat.latest.mean.IIe <- subset(ByRegion.dat.mean, 
                                       region == "OSPARIIe" & years == endYearBreeding$IIe)
ByRegion.dat.latest.mean.IIf <- subset(ByRegion.dat.mean, 
                                       region == "OSPARIIf" & years == endYearBreeding$IIf)
ByRegion.dat.latest.mean.III <- subset(ByRegion.dat.mean, 
                                       region == "OSPARIII" & years == endYearBreeding$III)
ByRegion.dat.latest.mean.IV <- subset(ByRegion.dat.mean, 
                                      region == "OSPARIV" & years == endYearBreeding$IV)
# ByRegion.dat.latest.mean.V <- subset(ByRegion.dat.mean,
#                                      region == "OSPARV" & years == endYearBreeding$V)

ByRegion.dat.latest.mean <- bind_rows(ByRegion.dat.latest.mean.I,
                                      ByRegion.dat.latest.mean.Im,
                                      ByRegion.dat.latest.mean.In,
                                      ByRegion.dat.latest.mean.Io,
                                      ByRegion.dat.latest.mean.II,
                                      ByRegion.dat.latest.mean.IIw,
                                      ByRegion.dat.latest.mean.IIa,
                                      ByRegion.dat.latest.mean.IIb,
                                      ByRegion.dat.latest.mean.IIc,
                                      ByRegion.dat.latest.mean.IId,
                                      ByRegion.dat.latest.mean.IIdw,
                                      ByRegion.dat.latest.mean.IIe,
                                      ByRegion.dat.latest.mean.IIf,
                                      ByRegion.dat.latest.mean.III, 
                                      ByRegion.dat.latest.mean.IV)
                                      # ByRegion.dat.latest.mean.V)

ByRegion.dat.latest.mean <- subset(ByRegion.dat.latest.mean, select = -c(years))

# pivot data frame so species in row and regions+sub_regions in columns
ByRegion.dat.latest.mean <- ByRegion.dat.latest.mean %>%
  pivot_longer(names_to = "species", cols = -region) %>%
  filter(!is.na(value)) %>% 
  pivot_wider(names_from = "region", values_from = value)

# add species information and sort by functional group and species name
ByRegion.dat.latest.mean <- merge(ByRegion.dat.latest.mean, add.info, by.x = "species", by.y = "Common_name", all.x = TRUE)
ByRegion.dat.latest.mean <- ByRegion.dat.latest.mean[order(ByRegion.dat.latest.mean$Fun_Group, ByRegion.dat.latest.mean$Species),]

Export species+region/subregion latest six year relative abundance means

write.csv(ByRegion.dat.latest.mean, 
          path("results", "EMECO checks", "breeding_out", "Breeding Seabirds Indicator Latest Six Year Mean.csv"), row.names = FALSE)

From ByRegion.dat.mean get the previous six year relative abundance mean for each species as rows and OSPAR region / sub-region as columns including species number of eggs and functional group. Sorted by functional group and species scientific name.

# select the intermediate year for each region
ByRegion.dat.previous.mean.I <- subset(ByRegion.dat.mean, 
                              region == "OSPARI" & years == previousYearBreeding)
ByRegion.dat.previous.mean.Im <- subset(ByRegion.dat.mean, 
                              region == "OSPARIm" & years == previousYearBreeding)
ByRegion.dat.previous.mean.In <- subset(ByRegion.dat.mean, 
                              region == "OSPARIn" & years == previousYearBreeding)
ByRegion.dat.previous.mean.Io <- subset(ByRegion.dat.mean, 
                              region == "OSPARIo" & years == previousYearBreeding)
ByRegion.dat.previous.mean.II <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)], 
                              region == "OSPARII" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)], 
                              region == "OSPARII" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIa <- subset(ByRegion.dat.mean, 
                              region == "OSPARIIa" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIb <- subset(ByRegion.dat.mean, 
                              region == "OSPARIIb" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIc <- subset(ByRegion.dat.mean, 
                              region == "OSPARIIc" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IId <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)], 
                              region == "OSPARIId" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIdw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)], 
                              region == "OSPARIId" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIe <- subset(ByRegion.dat.mean, 
                              region == "OSPARIIe" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIf <- subset(ByRegion.dat.mean, 
                              region == "OSPARIIf" & years == previousYearBreeding)
ByRegion.dat.previous.mean.III <- subset(ByRegion.dat.mean, 
                              region == "OSPARIII" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IV <- subset(ByRegion.dat.mean, 
                              region == "OSPARIV" & years == previousYearBreeding)
# ByRegion.dat.previous.mean.V <- subset(ByRegion.dat.mean,
#                               region == "OSPARV" & years == previousYearBreeding)

ByRegion.dat.previous.mean <- bind_rows(ByRegion.dat.previous.mean.I,
                               ByRegion.dat.previous.mean.Im,
                               ByRegion.dat.previous.mean.In,
                               ByRegion.dat.previous.mean.Io,
                               ByRegion.dat.previous.mean.II,
                               ByRegion.dat.previous.mean.IIw,
                               ByRegion.dat.previous.mean.IIa,
                               ByRegion.dat.previous.mean.IIb,
                               ByRegion.dat.previous.mean.IIc,
                               ByRegion.dat.previous.mean.IId,
                               ByRegion.dat.previous.mean.IIdw,
                               ByRegion.dat.previous.mean.IIe,
                               ByRegion.dat.previous.mean.IIf,
                               ByRegion.dat.previous.mean.III, 
                               ByRegion.dat.previous.mean.IV)
                               # ByRegion.dat.previous.mean.V)

ByRegion.dat.previous.mean <- subset(ByRegion.dat.previous.mean, select = -c(years))

# pivot data frame so species in row and regions+sub_regions in columns
ByRegion.dat.previous.mean <- ByRegion.dat.previous.mean %>%
  pivot_longer(names_to = "species", cols = -region) %>%
  filter(!is.na(value)) %>% 
  pivot_wider(names_from = "region", values_from = value)

# OSPARV missing as no species in previous assessment year
if(!any(names(ByRegion.dat.previous.mean) == "OSPARV")) {
  ByRegion.dat.previous.mean <- ByRegion.dat.previous.mean %>% 
    mutate(OSPARV = NA_integer_, .after = everything())
}

# add species information and sort by functional group and species name
ByRegion.dat.previous.mean <- merge(ByRegion.dat.previous.mean, add.info, by.x = "species", by.y = "Common_name", all.x = TRUE)
ByRegion.dat.previous.mean <- ByRegion.dat.previous.mean[order(ByRegion.dat.previous.mean$Fun_Group, ByRegion.dat.previous.mean$Species),]

Export species+region/subregion previous six year relative abundance means

write.csv(ByRegion.dat.previous.mean, 
          path("results", "EMECO checks", "breeding_out", 
               str_glue("Breeding Seabirds Indicator {previousYearBreeding} Six Year Mean.csv")), row.names = FALSE)

Create latest breeding assessment traffic light plots, with colour indicating if latest six year mean is above or below the relative abundance threshold for the species and arrows indicating if there has been a change in threshold status between the latest year and previous year (= 2014) assessments