Create directories

dir_create(path("results"))

Export counts function

#' exportCounts
#'
#'Exports counts for each species in each OSPAR region
#' @param speciesRegion dataframe of species and OSPAR region
#' @param counts dataframe of all counts
#' @return exports counts for species and OSPAR region as csv file
exportCounts <- function (speciesRegion, counts){
  
  region <- speciesRegion$OSPAR_REGION
  speciesName <- speciesRegion$CommonName
  
  message(str_glue("Exporting counts for {speciesName} in OSPAR Region {region}"))
  
  # Filter counts
  data <- counts %>%
          filter(CommonName == speciesName,
                 OSPAR_REGION == region) %>%
          select(SiteID, colony_name, year, count, count_type)
  
  # Export counts
  write_csv(data, file = path("data", "breeding", 
                              str_glue('{str_replace(speciesName, " ", "_")}_OSPAR{region}.csv')))
}

Import Data

Import and collate the imputated counts

# import imputated files
breedingCountsPath <- list.files(path("data", "breeding"),
                                 recursive = FALSE, pattern = "csv", full.names = TRUE)
breedingCountsPath <- breedingCountsPath %>% 
  discard(str_detect(., "SkipImputationBreeding.csv"))

breedingCounts <- adply(.data = breedingCountsPath, .margins = 1,
                        .fun = function(x) {
                          data <- read_csv(file = x, col_names= TRUE)
                        }) %>%
  mutate(Colony = str_remove(Colony, "^[:digit:]+~")) %>% 
  dplyr::rename(SiteID = ColonyNumber,
         colony_name = Colony,
         year = Year,
         count = gamCount,
         count_type = Imputation) %>% 
  select(SiteID, colony_name, year, count, count_type, CommonName)

# import skipped files
breedingCountsSkip  <- read_csv(path("data", "breeding", "SkipImputationBreeding.csv"), col_names= TRUE)

# add files together
breedingCounts <- bind_rows(breedingCounts, breedingCountsSkip)

sites <- read_csv(path("data", "BreedingSites ABUNDANCE.csv"), col_names = TRUE)
weighting <- read_csv(path("data", "RegionalWeightings.csv"), col_names = TRUE)
species <- read_csv(path("data", "number of eggs.csv"), col_names = TRUE)

Format Data

breedingCounts$SiteID<-as.character(breedingCounts$SiteID)
sites$SiteID<-as.character(sites$SiteID)
breedingCounts <- breedingCounts %>%
  left_join(sites, by = "SiteID") %>%
  left_join(species, by = c("CommonName" = "Common_name")) %>%
  dplyr::rename(Country = SUBADMIN) %>%
  mutate(count = as.numeric(count))

weighting <- weighting %>%
  filter(count_flag == "breeding_data") %>%
  select(-Count_unit, -count_flag)

Validation

# Validation: all breeding counts should match to a site ID
breedingCounts %>% 
  assert(not_na, Country:OSPAR_REGION,  
         success_fun = success_logical, error_fun = error_stop)

# Validation: all breeding counts should match to a common name
breedingCounts %>% 
  assert(not_na, SpeciesID:Fun_Group,  
         success_fun = success_logical, error_fun = error_stop)

Source Year Colony Counts

Weighting proportions for each species in each counties’ regions and subregions are calculated by dividing the count for the weighted source year by the weighted value for that source year

Modified by MF 18/11/2021 to avoid NULL values. I added the ifelse condition. When count is 0 the proportion is 1 and you retain the count number

dir.create(path("results", "EMECO checks"))

weightingProp <- breedingCounts %>%
  group_by (Country, OSPAR_REGION, OSPAR_SUBREGION, SpeciesID, year) %>%
  dplyr::summarise(counts = sum(count)) %>%
  inner_join(weighting, by = c("Country", "OSPAR_REGION" = "OSPAR_region", "OSPAR_SUBREGION" = "OSPAR_subregion",
                               "SpeciesID" = "AphiaID", "year" = "Source_year_weighting")) %>%
  mutate(proportion = if_else(((counts / Weighting_value) == 0 | (counts / Weighting_value) >1), 1, counts / Weighting_value))
write_csv(weightingProp, file = path("results", "EMECO checks", "RegionalWeightingsForBreedingCounts.csv"))

Apply Weighting

Species colony counts are weighted for each year by dividing the count by the species weighted proportions for that OSPAR region / sub-region

weightingProp <- weightingProp %>%
  select(-counts, -Weighting_value, -year, -species_name)

breedingCounts <- breedingCounts %>%
  left_join(weightingProp, by = c("Country", "OSPAR_REGION", "OSPAR_SUBREGION", "SpeciesID")) %>%
  mutate(count = if_else(!is.na(proportion), count / proportion, count)) %>%
  mutate(count_type = if_else(!is.na(proportion), str_c(count_type, ",weighted"), count_type)) %>%
  select(SiteID, colony_name, year, count, count_type, CommonName, OSPAR_REGION, OSPAR_SUBREGION) %>%
  mutate(OSPAR_REGION = case_when(breedingCounts$OSPAR_REGION == 1 ~ "I",
                                  breedingCounts$OSPAR_REGION == 2 ~ "II",
                                  breedingCounts$OSPAR_REGION == 3 ~ "III",
                                  breedingCounts$OSPAR_REGION == 4 ~ "IV",
                                  breedingCounts$OSPAR_REGION == 5 ~ "V"))

Filter out counts before 1991

breedingCounts <- breedingCounts %>% 
  filter(year >= 1991)

Aggregate counts

Species weighted colony counts are summed for each year for each OSPAR region / sub-region

breedingCounts <- breedingCounts %>%
  mutate(SiteID = if_else(!is.na(OSPAR_SUBREGION), str_c(OSPAR_REGION, OSPAR_SUBREGION),
                          OSPAR_REGION),
         colony_name = str_c("OSPAR", SiteID),
         count_type = "aggregated") %>%
  group_by(SiteID, colony_name, year, count_type, CommonName, OSPAR_REGION) %>%
  dplyr::summarise(count = sum(count)) %>%
  ungroup()

Species Region combination

speciesRegion <- breedingCounts %>%
  distinct(CommonName, OSPAR_REGION)
a_ply(.data = speciesRegion, .margins = 1, .fun = exportCounts, counts = breedingCounts, 
      .progress = "time")