breeding_seabird_trend_2021.Rmd
This script, derived from the 2016/17 BTO scripts, incorporates the code updates required for 2021. The script
suppressPackageStartupMessages({
library(b1indicator)
library(reshape)
library(stringr)
library(dplyr)
library(purrr)
library(tidyr)
library(fs)
library(luzlogr)
})
Set the year range, add the regions and sub region labels into vectors and create an empty dat_sum list to store the species+regions/subregions yearly and six yearly mean relative abundances.
startYearBreeding <- list(
Im = 1991,
In = 1991,
Io = 1991,
I = 1991,
IIa = 1991,
IIb = 1991,
IIc = 1991,
IId = 1991,
IIdw = 1991, # Missing Wadden sea counts have effect on species trend
IIe = 1991,
IIf = 1991,
II = 1991,
IIw = 1991, # Missing Wadden sea counts have effect on species trend
III = 1991,
IV = 1991
)
endYearBreeding <- list(
Im = 2020,
In = 2020,
Io = 2020,
I = 2020,
IIa = 2019,
IIb = 2020,
IIc = 2020,
IId = 2019,
IIdw = 2017, # Missing Wadden sea counts have effect on species trend
IIe = 2019,
IIf = 2019,
II = 2019,
IIw = 2017, # Missing Wadden sea counts have effect on species trend
III = 2019,
IV = 2016
)
# Previous assessment year used to compare threshold category changes in traffic light plots
previousYearBreeding <- 2014
# species where reduced Wadden year range 1991 - 2017 should be used instead of 1991 -2019
wadden_species <- c("Arctic_tern", "Barnacle_goose", "Black_headed_gull", "Common_gull", "Common_tern", "Eurasian_spoonbill", "Great_black_backed_gull", "Great_cormorant", "European_herring_gull", "Kentish_plover", "Lesser_black_backed_gull", "Little_tern", "Mediterranean_gull", "Eurasian_oystercatcher", "Pied_avocet", "Common_ringed_plover", "Sandwich_tern")
# list of OSPAR regions and bird subdivision
region_labels <- c("I", "II", "III", "IV")
sub_region_labels <- c("Im", "In", "Io", "IIa", "IIb", "IIc", "IId", "IIe", "IIf")
region_sub_region_labels <- c(region_labels, sub_region_labels)
# empty list to store species relative abundance values for regions and sub regions
dat_unsm <-list()
Create the directory structure for the results
# create directory for output data
dir.create(path("results"))
dir.create(path("results", "breeding_out"))
dir.create(path("results", "breeding_baseline_model"))
# and directory for EMECO Validation datasets
dir.create(path("results", "EMECO checks"))
dir.create(path("results", "EMECO checks", "breeding_out"))
dir.create(path("results", "EMECO checks", "species data"))
Import the species and sites information
# get .csv file containing species names & number of eggs
add.info = read.table(path("data", "number of eggs.csv"), header = TRUE, sep = ",",
stringsAsFactors = FALSE)
# standardise common names with underscores and not spaces or dashes in name
add.info$Common_name = str_replace_all(add.info$Common_name, "[:space:]|-", "_")
# bring in sites list to get ospar regions/sub-regions
sites = read.csv(path("data", "BreedingSites ABUNDANCE.csv"), header = TRUE)
Import the species yearly relative abundance counts with one data frame for each species+region eg Arctic_Skua_OSPARI, Arctic_Skua_OSPARII, Arctic_Skua_OSPARIII etc.
# get list of files containing breeding success data
files.list = list.files(path("data", "breeding"))
max_year_range <- full_seq(c(reduce(startYearBreeding, min),
reduce(endYearBreeding, max)), 1)
# work out which files came from seabird wizard, change so they only have 3
## columns (Year, Colony, Count), make sure all are formatted the same way & read them in
for(i in 1:length(files.list)){
dat = read.table(path("data", "breeding", files.list[i]), header = TRUE, sep = ",",
stringsAsFactors = FALSE)
if(ncol(dat) == 5) {
dat = dat[,c(3,2,4)]
names(dat) = c("Year", "Colony", "Count")
}
# standardise common names with underscores and not spaces or dashes in name
new_name = str_remove(files.list[i], ".csv")
new_name = str_replace_all(new_name, "[:space:]|-", "_")
assign(new_name, dat)
}
Remove species in regions / sub-regions failing analysis following imputation using the GAM method
Common_eider_OSPARII <- Common_eider_OSPARII %>%
filter(Colony != "OSPARIId")
rm(Manx_shearwater_OSPARIII)
files.list <- files.list %>%
discard(function(x) {x == "Manx_shearwater_OSPARIII.csv"})
rm(European_storm_petrel_OSPARIII)
files.list <- files.list %>%
discard(function(x) {x == "European_storm-petrel_OSPARIII.csv"})
rm(Atlantic_puffin_OSPARIII)
files.list <- files.list %>%
discard(function(x) {x == "Atlantic_puffin_OSPARIII.csv"})
From the list of species yearly relative abundance counts dataframes, in data.list vector, extract the list of species into a species vector.
# get unique species names
data.list = str_remove(files.list, ".csv")
data.list = str_replace_all(data.list, "[:space:]|-", "_")
species = unique(sapply(strsplit(data.list,"_OSPAR"), "[", 1))
For each species
# log setting baseline regression results
luzlogr::openlog(path("results", "breeding_baseline_model", "Breeding Seabirds setting baseline.csv"))
luzlogr::printlog("species,region,count_flag,p-value,method,baseline_count,first_relative_abundance", ts = FALSE)
for(i in 1:length(species)){
# create species directory
dir.create(path("results", "breeding_out", species[i]))
dir.create(path("results", "breeding_baseline_model", species[i]))
# get additional info
species.info = subset(add.info, Common_name == species[i])
# get number of regions for species
data = grep(str_c("^", species[i],"_O"), data.list)
# set target 0.7 if species lays >1 egg, 0.8 if species lays 1 egg
ifelse(species.info$Eggs[1] == 1, target <- 0.8, target <- 0.7)
# combine data from each region data frame
dat <- list()
for (j in 1:length(data)) {
dat[[j]] <- get(data.list[data[j]])
}
new_dat <- bind_rows(dat)
# convert abundance count data to wide format with each year a column and add
## site information columns
new_dat = cast(new_dat, Colony ~ Year, value.var = Count, fun.aggregate = mean)
new_dat = merge(new_dat, sites, all.x = TRUE)
# export species data
write.csv(new_dat, path("results", "EMECO checks", "species data", str_glue("{species[i]}_breeding.csv")), row.names = FALSE)
# calculate and plot relative abundance values for regions
for(j in as.numeric(as.roman(region_labels))) {
# Update start and end year for each OSPAR REGION
if (j == 1) {
startYear = startYearBreeding$I
endYear = endYearBreeding$I
} else if (j ==2 & !species[i] %in% wadden_species) {
startYear = startYearBreeding$II
endYear = endYearBreeding$II
} else if (j ==2 & species[i] %in% wadden_species) {
startYear = startYearBreeding$IIw
endYear = endYearBreeding$IIw
} else if (j == 3) {
startYear = startYearBreeding$III
endYear = endYearBreeding$III
} else if (j == 4) {
startYear = startYearBreeding$IV
endYear = endYearBreeding$IV
}
dat_unsm[[str_glue("{species[i]}_{as.roman(j)}")]] <-
calculate_breeding_relative_abundance(ospar = subset(new_dat, OSPAR_REGION == j),
label = as.roman(j),
species_name = species[i],
target_threshold = target,
start_year = startYear,
end_year = endYear)
}
## calculate and plot relative abundance values for sub regions
for(j in sub_region_labels) {
# Update start and end year for each OSPAR SUB REGION
if (j == "Im") {
endYear = endYearBreeding$Im
startYear = startYearBreeding$Im
} else if (j == "In") {
endYear = endYearBreeding$In
startYear = startYearBreeding$In
} else if (j == "Io") {
endYear = endYearBreeding$Io
startYear = startYearBreeding$Io
} else if (j == "IIa") {
endYear = endYearBreeding$IIa
startYear = startYearBreeding$IIa
} else if (j == "IIb") {
endYear = endYearBreeding$IIb
startYear = startYearBreeding$IIb
} else if (j == "IIc") {
endYear = endYearBreeding$IIc
startYear = startYearBreeding$IIc
} else if (j == "IId" & !species[i] %in% wadden_species) {
endYear = endYearBreeding$IId
startYear = startYearBreeding$IId
} else if (j == "IId" & species[i] %in% wadden_species) {
endYear = endYearBreeding$IIdw
startYear = startYearBreeding$IIdw
} else if (j == "IIe") {
endYear = endYearBreeding$IIe
startYear = startYearBreeding$IIe
} else if (j == "IIf") {
endYear = endYearBreeding$IIf
startYear = startYearBreeding$IIf
}
dat_unsm[[str_glue("{species[i]}_{j}")]] <-
calculate_breeding_relative_abundance(ospar = subset(new_dat, SUBADMIN == j),
label = j,
species_name = species[i],
target_threshold = target,
start_year = startYear,
end_year = endYear)
}
}
dat_unsm <- bind_rows(dat_unsm)
# close baseline regression results log
luzlogr::closelog(sessionInfo = FALSE)
Splits and exports dat_unsm dataframe into region and sub region dataframes with species yearly relative abundance values as columns eg OSPARI_unsm_all, OSPARIm_unsm_all, OSPARII_unsm_all
# create yearly relative abundance data frames for each region and sub region
for(i in region_sub_region_labels) {
# get yearly relative abundance values for the region/subregion
dat_unsm_label <- dat_unsm[dat_unsm$region == str_glue("OSPAR{i}"),]
dat_unsm_label <- dat_unsm_label[,c("years", "index", "species")]
dat_unsm_label <- dat_unsm_label %>%
pivot_wider(names_from = species, values_from = index)
dat_unsm_label <- dat_unsm_label[order(dat_unsm_label$years),]
# export the yearly relative abundance values for the region/subregion as a csv file
write.csv(dat_unsm_label, path("results", "EMECO checks", "breeding_out",
str_glue("OSPAR{i}_unsm_all.csv")), row.names = FALSE)
# rename dataframe with region/sub region name
assign(str_glue("OSPAR{i}_unsm_all"), dat_unsm_label)
}
Splits and exports dat_unsm dataframe into region and sub region dataframes with species six yearly mean relative abundance values as columns eg OSPARI_unsm_all_mean, OSPARIm_unsm_all_mean, OSPARII_unsm_all_mean
# create six year mean relative abundance data frames for each region and sub region
for(i in region_sub_region_labels) {
# get six year mean relative abundance values for the region/subregion
dat_unsm_mean_label <- dat_unsm[dat_unsm$region == str_glue("OSPAR{i}"),]
dat_unsm_mean_label <- dat_unsm_mean_label[,c("years", "six_year_mean", "species")]
dat_unsm_mean_label <- dat_unsm_mean_label %>%
pivot_wider(names_from = species, values_from = six_year_mean)
dat_unsm_mean_label <- dat_unsm_mean_label[order(dat_unsm_mean_label$years),]
# export the six year mean relative abundance values for the region/subregion as a csv file
write.csv(dat_unsm_mean_label, path("results", "EMECO checks", "breeding_out",
str_glue("OSPAR{i}_unsm_all_mean.csv")), row.names = FALSE)
# rename dataframe with region/sub region name
assign(str_glue("OSPAR{i}_unsm_all_mean"), dat_unsm_mean_label)
}
Tidy up removing all objects from global environment except object required in the subsequent analyses
# tidy up
unsm_all_mean <- vector()
for (i in region_sub_region_labels) {
unsm_all_mean[i] <- c(str_glue("OSPAR{i}_unsm_all_mean"))
}
required_data <- c("species", "add.info", "endYearBreeding", "previousYearBreeding",
"region_labels", "sub_region_labels", "region_sub_region_labels",
"wadden_species")
rm(list=ls()[!ls() %in% c(unsm_all_mean, required_data)])
Combine all the breeding species’ six year relative abundance means for all the regions and subregions into one data frame, ByRegion.dat.mean
Region <- c(str_glue("OSPAR{region_sub_region_labels}_unsm_all_mean"))
dat <- list()
for (i in 1:length(Region)) {
Region.dat <- get(Region[[i]])
Region.dat$region <- str_remove(Region[[i]], "_unsm_all_mean")
dat[[i]] <- Region.dat
}
ByRegion.dat.mean <- bind_rows(dat)
From ByRegion.dat.mean get the latest six year relative abundance mean for each species as rows and OSPAR region / sub-region as columns including species number of eggs and functional group. Sorted by functional group and species scientific name.
# select the latest year for each region
ByRegion.dat.latest.mean.I <- subset(ByRegion.dat.mean,
region == "OSPARI" & years == endYearBreeding$I)
ByRegion.dat.latest.mean.Im <- subset(ByRegion.dat.mean,
region == "OSPARIm" & years == endYearBreeding$Im)
ByRegion.dat.latest.mean.In <- subset(ByRegion.dat.mean,
region == "OSPARIn" & years == endYearBreeding$In)
ByRegion.dat.latest.mean.Io <- subset(ByRegion.dat.mean,
region == "OSPARIo" & years == endYearBreeding$Io)
ByRegion.dat.latest.mean.II <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)],
region == "OSPARII" & years == endYearBreeding$II)
ByRegion.dat.latest.mean.IIw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)],
region == "OSPARII" & years == endYearBreeding$IIw)
ByRegion.dat.latest.mean.IIa <- subset(ByRegion.dat.mean,
region == "OSPARIIa" & years == endYearBreeding$IIa)
ByRegion.dat.latest.mean.IIb <- subset(ByRegion.dat.mean,
region == "OSPARIIb" & years == endYearBreeding$IIb)
ByRegion.dat.latest.mean.IIc <- subset(ByRegion.dat.mean,
region == "OSPARIIc" & years == endYearBreeding$IIc)
ByRegion.dat.latest.mean.IId <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)],
region == "OSPARIId" & years == endYearBreeding$IId)
ByRegion.dat.latest.mean.IIdw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)],
region == "OSPARIId" & years == endYearBreeding$IIdw)
ByRegion.dat.latest.mean.IIe <- subset(ByRegion.dat.mean,
region == "OSPARIIe" & years == endYearBreeding$IIe)
ByRegion.dat.latest.mean.IIf <- subset(ByRegion.dat.mean,
region == "OSPARIIf" & years == endYearBreeding$IIf)
ByRegion.dat.latest.mean.III <- subset(ByRegion.dat.mean,
region == "OSPARIII" & years == endYearBreeding$III)
ByRegion.dat.latest.mean.IV <- subset(ByRegion.dat.mean,
region == "OSPARIV" & years == endYearBreeding$IV)
# ByRegion.dat.latest.mean.V <- subset(ByRegion.dat.mean,
# region == "OSPARV" & years == endYearBreeding$V)
ByRegion.dat.latest.mean <- bind_rows(ByRegion.dat.latest.mean.I,
ByRegion.dat.latest.mean.Im,
ByRegion.dat.latest.mean.In,
ByRegion.dat.latest.mean.Io,
ByRegion.dat.latest.mean.II,
ByRegion.dat.latest.mean.IIw,
ByRegion.dat.latest.mean.IIa,
ByRegion.dat.latest.mean.IIb,
ByRegion.dat.latest.mean.IIc,
ByRegion.dat.latest.mean.IId,
ByRegion.dat.latest.mean.IIdw,
ByRegion.dat.latest.mean.IIe,
ByRegion.dat.latest.mean.IIf,
ByRegion.dat.latest.mean.III,
ByRegion.dat.latest.mean.IV)
# ByRegion.dat.latest.mean.V)
ByRegion.dat.latest.mean <- subset(ByRegion.dat.latest.mean, select = -c(years))
# pivot data frame so species in row and regions+sub_regions in columns
ByRegion.dat.latest.mean <- ByRegion.dat.latest.mean %>%
pivot_longer(names_to = "species", cols = -region) %>%
filter(!is.na(value)) %>%
pivot_wider(names_from = "region", values_from = value)
# add species information and sort by functional group and species name
ByRegion.dat.latest.mean <- merge(ByRegion.dat.latest.mean, add.info, by.x = "species", by.y = "Common_name", all.x = TRUE)
ByRegion.dat.latest.mean <- ByRegion.dat.latest.mean[order(ByRegion.dat.latest.mean$Fun_Group, ByRegion.dat.latest.mean$Species),]
Export species+region/subregion latest six year relative abundance means
write.csv(ByRegion.dat.latest.mean,
path("results", "EMECO checks", "breeding_out", "Breeding Seabirds Indicator Latest Six Year Mean.csv"), row.names = FALSE)
From ByRegion.dat.mean get the previous six year relative abundance mean for each species as rows and OSPAR region / sub-region as columns including species number of eggs and functional group. Sorted by functional group and species scientific name.
# select the intermediate year for each region
ByRegion.dat.previous.mean.I <- subset(ByRegion.dat.mean,
region == "OSPARI" & years == previousYearBreeding)
ByRegion.dat.previous.mean.Im <- subset(ByRegion.dat.mean,
region == "OSPARIm" & years == previousYearBreeding)
ByRegion.dat.previous.mean.In <- subset(ByRegion.dat.mean,
region == "OSPARIn" & years == previousYearBreeding)
ByRegion.dat.previous.mean.Io <- subset(ByRegion.dat.mean,
region == "OSPARIo" & years == previousYearBreeding)
ByRegion.dat.previous.mean.II <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)],
region == "OSPARII" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)],
region == "OSPARII" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIa <- subset(ByRegion.dat.mean,
region == "OSPARIIa" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIb <- subset(ByRegion.dat.mean,
region == "OSPARIIb" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIc <- subset(ByRegion.dat.mean,
region == "OSPARIIc" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IId <- subset(ByRegion.dat.mean[!(colnames(ByRegion.dat.mean) %in% wadden_species)],
region == "OSPARIId" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIdw <- subset(ByRegion.dat.mean[, c("years", "region", wadden_species)],
region == "OSPARIId" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIe <- subset(ByRegion.dat.mean,
region == "OSPARIIe" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IIf <- subset(ByRegion.dat.mean,
region == "OSPARIIf" & years == previousYearBreeding)
ByRegion.dat.previous.mean.III <- subset(ByRegion.dat.mean,
region == "OSPARIII" & years == previousYearBreeding)
ByRegion.dat.previous.mean.IV <- subset(ByRegion.dat.mean,
region == "OSPARIV" & years == previousYearBreeding)
# ByRegion.dat.previous.mean.V <- subset(ByRegion.dat.mean,
# region == "OSPARV" & years == previousYearBreeding)
ByRegion.dat.previous.mean <- bind_rows(ByRegion.dat.previous.mean.I,
ByRegion.dat.previous.mean.Im,
ByRegion.dat.previous.mean.In,
ByRegion.dat.previous.mean.Io,
ByRegion.dat.previous.mean.II,
ByRegion.dat.previous.mean.IIw,
ByRegion.dat.previous.mean.IIa,
ByRegion.dat.previous.mean.IIb,
ByRegion.dat.previous.mean.IIc,
ByRegion.dat.previous.mean.IId,
ByRegion.dat.previous.mean.IIdw,
ByRegion.dat.previous.mean.IIe,
ByRegion.dat.previous.mean.IIf,
ByRegion.dat.previous.mean.III,
ByRegion.dat.previous.mean.IV)
# ByRegion.dat.previous.mean.V)
ByRegion.dat.previous.mean <- subset(ByRegion.dat.previous.mean, select = -c(years))
# pivot data frame so species in row and regions+sub_regions in columns
ByRegion.dat.previous.mean <- ByRegion.dat.previous.mean %>%
pivot_longer(names_to = "species", cols = -region) %>%
filter(!is.na(value)) %>%
pivot_wider(names_from = "region", values_from = value)
# OSPARV missing as no species in previous assessment year
if(!any(names(ByRegion.dat.previous.mean) == "OSPARV")) {
ByRegion.dat.previous.mean <- ByRegion.dat.previous.mean %>%
mutate(OSPARV = NA_integer_, .after = everything())
}
# add species information and sort by functional group and species name
ByRegion.dat.previous.mean <- merge(ByRegion.dat.previous.mean, add.info, by.x = "species", by.y = "Common_name", all.x = TRUE)
ByRegion.dat.previous.mean <- ByRegion.dat.previous.mean[order(ByRegion.dat.previous.mean$Fun_Group, ByRegion.dat.previous.mean$Species),]
Export species+region/subregion previous six year relative abundance means
write.csv(ByRegion.dat.previous.mean,
path("results", "EMECO checks", "breeding_out",
str_glue("Breeding Seabirds Indicator {previousYearBreeding} Six Year Mean.csv")), row.names = FALSE)
Create latest breeding assessment traffic light plots, with colour indicating if latest six year mean is above or below the relative abundance threshold for the species and arrows indicating if there has been a change in threshold status between the latest year and previous year (= 2014) assessments