This script splits the countries abundance data into two, either countries that have supplied modelled that can skip the imputation step or countries data requiring the imputation step. The files are exported for the next imputation step and subsequent analysis

Create directories

dir_create("data")
dir_create(path("data", "breeding"))
dir_create(path("data", "wintering"))

Get ICES data

Select either the regional or subregional ICES abundance data

# b1_abundance_data_ices <- example_abundance_data
b1_abundance_data_ices <- pin_get("b1_abundance_data_ices_analysis")

Export number of eggs csv

species %>%
  select("SpeciesID" = aphia_id,
         "Species" = scientific_name,
         "Common_name" = common_name,
         "Eggs" = eggs,
         "Fun_Group" = functional_group) %>% 
  write_csv(path("data", "number of eggs.csv"))

Export BreedingSites ABUNDANCE csv

ospar_subregions <- tibble::tribble(
  ~SiteID, ~SUBADMIN,    ~Colony, ~OSPAR_REGION, ~OSPAR_SUBREGION,
  "Im",      "Im",  "OSPARIm",            1L,              "m",
  "In",      "In",  "OSPARIn",            1L,              "n",
  "Io",      "Io",  "OSPARIo",            1L,              "o",
  "IIa",     "IIa", "OSPARIIa",            2L,              "a",
  "IIb",     "IIb", "OSPARIIb",            2L,              "b",
  "IIc",     "IIc", "OSPARIIc",            2L,              "c",
  "IId",     "IId", "OSPARIId",            2L,              "d",
  "IIe",     "IIe", "OSPARIIe",            2L,              "e",
  "IIf",     "IIf", "OSPARIIf",            2L,              "f",
  "III",     "III", "OSPARIII",            3L,               NA,
  "IV",      "IV",  "OSPARIV",            4L,               NA,
  "V",      "V",  "OSPARV",            5L,               NA
)

bind_rows(pin_get("b1_abundance_data_ices_analysis"),
          pin_get("b1_abundance_data_ices_excluded")) %>% 
  mutate(ices_colony_id = as.character(ices_colony_id),
         colony_name = str_replace_all(colony_name, "/", "-")) %>% 
  select("SiteID" = ices_colony_id,
         "SUBADMIN" = country,
         "Colony" = colony_name,
         "OSPAR_REGION" = area_reference,
         "OSPAR_SUBREGION" = birds_subdivision) %>% 
  distinct() %>% 
  bind_rows(ospar_subregions) %>% 
  write_csv(path("data", "BreedingSites ABUNDANCE.csv"))

Export Regional Weighting

Regional Weightings that have zero counts for the source_year_weighting have been excluded.

regional_weightings %>% 
  dplyr::rename("Country" = country,
         "AphiaID" = aphia_id,
         "OSPAR_region" = ospar_region,
         "OSPAR_subregion" = ospar_subregion,
         "Weighting_value" = weighting_value,
         "Source_year_weighting" = source_year_weighting,
         "Count_unit" = count_unit
         ) %>% 
  select(-colony_ices_label) %>% 
  write_csv(path("data", "RegionalWeightings.csv"))

Export Breeding Seabird Counts for Imputation

All countries apart from Netherlands in OPSAR IId and Spain in OPSAR IV

ices_breeding_imputation <- b1_abundance_data_ices %>% 
  unite("colony", ices_colony_id, colony_name, sep = "~") %>% 
  mutate(colony = str_replace_all(colony, "/", "-")) %>%
  filter(str_to_lower(count_flag) == "breeding_data") %>% 
  filter(!(country %in% c("ES", "NL"))) %>% 
  select("Country" = country,
         "Colony" = colony,
         "CommonName" = common_name,
         "Species" = scientific_name,
         "Year" = year,
         "Count" = count,
         "Sample" = sample_breeding,
         "Plot" = plot_combination,
         "OSPAR" =  area_reference,
         "OSPARsubRegion" = birds_subdivision)
    
# sum whole colony counts where there is more than one count for each species
# and colony per year [requested by Ian Mitchell, 2016-17]
ices_breeding_imputation %>% 
  filter(Sample == 2) %>% 
  group_by(across(-Count)) %>% 
  dplyr::summarise(Count = sum(Count)) %>% 
  # plot counts are not summed as they can be multiple counts which is handled in
  # the imputation step
  bind_rows(
    ices_breeding_imputation %>% 
      filter(Sample == 1)) %>%
  relocate(Count, .after = "Year") %>% 
  write_tsv(path("data", "breeding", "ForImputationBreeding.txt"))

Export Breeding Seabird Counts Skip Imputation

  • Netherlands OSPAR IId and Spain OPSAR IV
ices_breeding_skip_imputation <- b1_abundance_data_ices %>% 
  filter(str_to_lower(count_flag) == "breeding_data") %>% 
  filter(country %in% c("ES", "NL")) %>% 
  select(ices_colony_id,
         colony_name,
         year, 
         count,
         count_method,
         common_name,
         count_flag,
         country,
         area_reference,
         birds_subdivision) %>% 
  rename("SiteID" = ices_colony_id,
         "count_type" = count_method,
         "CommonName" = common_name) %>% 
  select(SiteID:CommonName) %>% 
  write_csv(path("data", "breeding", "SkipImputationBreeding.csv"))

Export Wintering Seabird Counts for Imputation

All countries apart from Netherlands in OSPAR IId and Great Britain OPSAR IIa, IId, IIe, IIf, III.

ices_wintering_imputation <- b1_abundance_data_ices %>% 
  unite("colony", ices_colony_id, colony_name, sep = "~") %>% 
  mutate(colony = str_replace_all(colony, "/", "-")) %>% 
  filter(str_to_lower(count_flag) == "non_breeding") %>% 
  filter(!(country %in% c("NL", "GB"))) %>% 
  select("Country" = country,
         "Colony" = colony,
         "CommonName" = common_name,
         "Species" = scientific_name,
         "Year" = year,
         "Count" = count,
         "Sample" = sample_breeding,
         "Plot" = plot_combination,
         "OSPAR" =  area_reference,
         "OSPARsubRegion" = birds_subdivision)

# sum the counts where there is more than one count per species within a year 
# for a colony. This relates to counts for subspecies
ices_wintering_imputation %>% 
  group_by(across(-Count)) %>% 
  dplyr::summarise(Count = sum(Count)) %>% 
  relocate(Count, .after = "Year") %>% 
  write_tsv(path("data", "wintering", "ForImputationWintering.txt"))

Export Wintering Seabird Counts Skip Imputation

Netherlands in OSPAR IId and Great Britain OPSAR IIa, IId, IIe, IIf, III.

ices_wintering_skip_imputation <- b1_abundance_data_ices %>% 
  filter(str_to_lower(count_flag) == "non_breeding") %>% 
  filter(country %in% c("NL", "GB")) %>% 
  select(ices_colony_id,
         colony_name,
         year, 
         count,
         count_method,
         common_name,
         count_flag,
         country,
         area_reference,
         birds_subdivision)

# sum the counts where there is more than one count per species within a year 
# for a colony. This relates to counts for subspecies
ices_wintering_skip_imputation <- ices_wintering_skip_imputation %>% 
  group_by(across(-count)) %>% 
  dplyr::summarise(count = sum(count)) %>% 
  ungroup()

# export
ices_wintering_skip_imputation <- ices_wintering_skip_imputation %>% 
  rename("SiteID" = ices_colony_id,
         "count_type" = count_method,
         "CommonName" = common_name) %>% 
  select(SiteID:year, count, count_type, CommonName) %>% 
  write_csv(path("data", "wintering", "SkipImputationWintering.csv"))