b1_indicator_regional_weighting_breeding.Rmd
library(b1indicator)
library(plyr)
library(dplyr)
library(purrr)
library(tidyr)
library(stringr)
library(readr)
library(fs)
library(assertr)
library(forcats)
dir_create(path("results"))
#' exportCounts
#'
#'Exports counts for each species in each OSPAR region
#' @param speciesRegion dataframe of species and OSPAR region
#' @param counts dataframe of all counts
#' @return exports counts for species and OSPAR region as csv file
exportCounts <- function (speciesRegion, counts){
region <- speciesRegion$OSPAR_REGION
speciesName <- speciesRegion$CommonName
message(str_glue("Exporting counts for {speciesName} in OSPAR Region {region}"))
# Filter counts
data <- counts %>%
filter(CommonName == speciesName,
OSPAR_REGION == region) %>%
select(SiteID, colony_name, year, count, count_type)
# Export counts
write_csv(data, file = path("data", "breeding",
str_glue('{str_replace(speciesName, " ", "_")}_OSPAR{region}.csv')))
}
Import and collate the imputated counts
# import imputated files
breedingCountsPath <- list.files(path("data", "breeding"),
recursive = FALSE, pattern = "csv", full.names = TRUE)
breedingCountsPath <- breedingCountsPath %>%
discard(str_detect(., "SkipImputationBreeding.csv"))
breedingCounts <- adply(.data = breedingCountsPath, .margins = 1,
.fun = function(x) {
data <- read_csv(file = x, col_names= TRUE)
}) %>%
mutate(Colony = str_remove(Colony, "^[:digit:]+~")) %>%
dplyr::rename(SiteID = ColonyNumber,
colony_name = Colony,
year = Year,
count = gamCount,
count_type = Imputation) %>%
select(SiteID, colony_name, year, count, count_type, CommonName)
# import skipped files
breedingCountsSkip <- read_csv(path("data", "breeding", "SkipImputationBreeding.csv"), col_names= TRUE)
# add files together
breedingCounts <- bind_rows(breedingCounts, breedingCountsSkip)
sites <- read_csv(path("data", "BreedingSites ABUNDANCE.csv"), col_names = TRUE)
weighting <- read_csv(path("data", "RegionalWeightings.csv"), col_names = TRUE)
species <- read_csv(path("data", "number of eggs.csv"), col_names = TRUE)
breedingCounts$SiteID<-as.character(breedingCounts$SiteID)
sites$SiteID<-as.character(sites$SiteID)
breedingCounts <- breedingCounts %>%
left_join(sites, by = "SiteID") %>%
left_join(species, by = c("CommonName" = "Common_name")) %>%
dplyr::rename(Country = SUBADMIN) %>%
mutate(count = as.numeric(count))
weighting <- weighting %>%
filter(count_flag == "breeding_data") %>%
select(-Count_unit, -count_flag)
# Validation: all breeding counts should match to a site ID
breedingCounts %>%
assert(not_na, Country:OSPAR_REGION,
success_fun = success_logical, error_fun = error_stop)
# Validation: all breeding counts should match to a common name
breedingCounts %>%
assert(not_na, SpeciesID:Fun_Group,
success_fun = success_logical, error_fun = error_stop)
Weighting proportions for each species in each counties’ regions and subregions are calculated by dividing the count for the weighted source year by the weighted value for that source year
Modified by MF 18/11/2021 to avoid NULL values. I added the ifelse condition. When count is 0 the proportion is 1 and you retain the count number
dir.create(path("results", "EMECO checks"))
weightingProp <- breedingCounts %>%
group_by (Country, OSPAR_REGION, OSPAR_SUBREGION, SpeciesID, year) %>%
dplyr::summarise(counts = sum(count)) %>%
inner_join(weighting, by = c("Country", "OSPAR_REGION" = "OSPAR_region", "OSPAR_SUBREGION" = "OSPAR_subregion",
"SpeciesID" = "AphiaID", "year" = "Source_year_weighting")) %>%
mutate(proportion = if_else(((counts / Weighting_value) == 0 | (counts / Weighting_value) >1), 1, counts / Weighting_value))
write_csv(weightingProp, file = path("results", "EMECO checks", "RegionalWeightingsForBreedingCounts.csv"))
Species colony counts are weighted for each year by dividing the count by the species weighted proportions for that OSPAR region / sub-region
weightingProp <- weightingProp %>%
select(-counts, -Weighting_value, -year, -species_name)
breedingCounts <- breedingCounts %>%
left_join(weightingProp, by = c("Country", "OSPAR_REGION", "OSPAR_SUBREGION", "SpeciesID")) %>%
mutate(count = if_else(!is.na(proportion), count / proportion, count)) %>%
mutate(count_type = if_else(!is.na(proportion), str_c(count_type, ",weighted"), count_type)) %>%
select(SiteID, colony_name, year, count, count_type, CommonName, OSPAR_REGION, OSPAR_SUBREGION) %>%
mutate(OSPAR_REGION = case_when(breedingCounts$OSPAR_REGION == 1 ~ "I",
breedingCounts$OSPAR_REGION == 2 ~ "II",
breedingCounts$OSPAR_REGION == 3 ~ "III",
breedingCounts$OSPAR_REGION == 4 ~ "IV",
breedingCounts$OSPAR_REGION == 5 ~ "V"))
Species weighted colony counts are summed for each year for each OSPAR region / sub-region
breedingCounts <- breedingCounts %>%
mutate(SiteID = if_else(!is.na(OSPAR_SUBREGION), str_c(OSPAR_REGION, OSPAR_SUBREGION),
OSPAR_REGION),
colony_name = str_c("OSPAR", SiteID),
count_type = "aggregated") %>%
group_by(SiteID, colony_name, year, count_type, CommonName, OSPAR_REGION) %>%
dplyr::summarise(count = sum(count)) %>%
ungroup()