b1_indicator_regional_weighting_wintering.Rmd
library(b1indicator)
library(plyr)
library(dplyr)
library(stringr)
library(purrr)
library(tidyr)
library(readr)
library(fs)
library(assertr)
dir_create(path("results"))
Import and collate the imputated counts
# import imputated files
winteringCountsPath <- list.files(path("data", "wintering"),
recursive = FALSE, pattern = "csv", full.names = TRUE)
winteringCountsPath <- winteringCountsPath %>%
discard(str_detect(., "SkipImputationWintering.csv"))
winteringCounts <- adply(.data = winteringCountsPath, .margins = 1, .fun = function(x) {
data <- read_csv(file = x, col_names= TRUE)
}) %>%
mutate(SiteID = str_extract(Colony, "^[:digit:]+"),
SiteID = as.numeric(SiteID),
Colony = str_remove(Colony, "^[:digit:]+~"),
count_type = Imputation) %>%
dplyr::rename(colony_name = Colony,
year = Year,
count = gamCount) %>%
select(SiteID, colony_name, year, count, count_type, CommonName)
# import skipped files
winteringCountsSkip <- read_csv(path("data", "wintering", "SkipImputationWintering.csv"),
col_names= TRUE)
# add files together
winteringCounts <- bind_rows(winteringCounts, winteringCountsSkip)
sites <- read_csv(path("data", "BreedingSites ABUNDANCE.csv"), col_names = TRUE)
weighting <- read_csv(path("data", "RegionalWeightings.csv"), col_names = TRUE)
species <- read_csv(path("data", "number of eggs.csv"), col_names = TRUE)
sites$SiteID <- as.numeric(sites$SiteID)
winteringCounts <- winteringCounts %>%
left_join(sites, by = "SiteID") %>%
left_join(species, by = c("CommonName" = "Common_name")) %>%
dplyr::rename(Country = SUBADMIN) %>%
mutate(count = as.numeric(count))
weighting <- weighting %>%
filter(count_flag == "non_breeding") %>%
select(-Count_unit, -count_flag)
# Validation: all breeding counts should match to a site ID
winteringCounts %>%
assert(not_na, Country:OSPAR_REGION,
success_fun = success_logical, error_fun = error_stop)
# Validation: all breeding counts should match to a common name
winteringCounts %>%
assert(not_na, SpeciesID:Fun_Group,
success_fun = success_logical, error_fun = error_stop)
Weighting proportions for each species in each counties’ regions and subregions are calculated by dividing the count for the weighted source year by the weighted value for that source year
Modified by MF 18/11/2021 to avoid NULL values. I added the ifelse condition. When count is 0 the proportion is 1 and you retain the count number
dir.create(path("results", "EMECO checks"))
weightingProp <- winteringCounts %>%
group_by (Country, OSPAR_REGION, OSPAR_SUBREGION, SpeciesID, year) %>%
dplyr::summarise(counts = sum(count)) %>%
inner_join(weighting, by = c("Country", "OSPAR_REGION" = "OSPAR_region", "OSPAR_SUBREGION" = "OSPAR_subregion",
"SpeciesID" = "AphiaID", "year" = "Source_year_weighting")) %>%
mutate( proportion = if_else(((counts / Weighting_value) == 0 | (counts / Weighting_value) >1), 1, counts / Weighting_value))
write_csv(weightingProp, file = path("results", "EMECO checks", "RegionalWeightingsForWinteringCounts.csv"))
Species colony counts are weighted for each year by dividing the count by the species weighted proportions for that OSPAR region / sub-region
weightingProp <- weightingProp %>%
select(-counts, -Weighting_value, -year, -species_name)
winteringCounts <- winteringCounts %>%
left_join(weightingProp, by = c("Country", "OSPAR_REGION", "OSPAR_SUBREGION", "SpeciesID")) %>%
mutate(count = if_else(!is.na(proportion), count / proportion, count)) %>%
mutate(count_type = if_else(!is.na(proportion), str_c(count_type, ",weighted"), count_type)) %>%
select(SiteID, colony_name, year, count, count_type, SpeciesID, CommonName, OSPAR_REGION, OSPAR_SUBREGION) %>%
mutate(OSPAR_REGION = case_when(winteringCounts$OSPAR_REGION == 1 ~ "I",
winteringCounts$OSPAR_REGION == 2 ~ "II",
winteringCounts$OSPAR_REGION == 3 ~ "III",
winteringCounts$OSPAR_REGION == 4 ~ "IV",
winteringCounts$OSPAR_REGION == 5 ~ "V"))
Species weighted colony counts are summed for each year for each OSPAR region / sub-region
winteringCounts <- winteringCounts %>%
mutate(ColonyID = if_else(!is.na(OSPAR_SUBREGION), str_c("OSPAR", OSPAR_REGION, OSPAR_SUBREGION),
str_c("OSPAR", OSPAR_REGION))) %>%
dplyr::rename(OSPAR_region = OSPAR_REGION,
`OSPAR_sub-division` = OSPAR_SUBREGION,
Year = year) %>%
group_by(OSPAR_region, `OSPAR_sub-division`, ColonyID, SpeciesID, Year, CommonName) %>%
dplyr::summarise(Count = sum(count)) %>%
ungroup()
# species where reduced Wadden year range 1991 - 2016 should be used instead of 1991 -2020
wadden_species <- c("Bar-tailed godwit", "Barnacle goose", "Black-headed gull", "Brent goose", "Common gull", "Curlew sandpiper", "Eurasian curlew", "Eurasian spoonbill", "European golden plover", "Great black-backed gull", "Great cormorant", "Great crested grebe", "Common greenshank", "Grey plover", "European herring gull", "Kentish plover", "Northern lapwing", "Lesser black-backed gull", "Mallard", "Mute swan", "Eurasian oystercatcher", "Pied avocet", "Northern pintail", "Red-breasted merganser", "Red knot", "Common redshank", "Common ringed plover", "Ruff", "Sanderling", "Common shelduck", "Northern shoveler", "Slavonian grebe", "Smew", "Spotted redshank", "Eurasian teal", "Ruddy turnstone", "Eurasian whimbrel", "Whooper swan", "Eurasian wigeon")
winteringCounts <- winteringCounts %>%
filter(!(OSPAR_region == "II" &
`OSPAR_sub-division` == "d" &
between(Year, 2017, 2020) &
CommonName %in% wadden_species))