b1_indicator_export_data.Rmd
library(b1indicator)
library(dplyr)
library(tidyr)
library(stringr)
library(pins)
library(fs)
library(readr)
This script splits the countries abundance data into two, either countries that have supplied modelled that can skip the imputation step or countries data requiring the imputation step. The files are exported for the next imputation step and subsequent analysis
dir_create("data")
dir_create(path("data", "breeding"))
dir_create(path("data", "wintering"))
Select either the regional or subregional ICES abundance data
# b1_abundance_data_ices <- example_abundance_data
b1_abundance_data_ices <- pin_get("b1_abundance_data_ices_analysis")
ospar_subregions <- tibble::tribble(
~SiteID, ~SUBADMIN, ~Colony, ~OSPAR_REGION, ~OSPAR_SUBREGION,
"Im", "Im", "OSPARIm", 1L, "m",
"In", "In", "OSPARIn", 1L, "n",
"Io", "Io", "OSPARIo", 1L, "o",
"IIa", "IIa", "OSPARIIa", 2L, "a",
"IIb", "IIb", "OSPARIIb", 2L, "b",
"IIc", "IIc", "OSPARIIc", 2L, "c",
"IId", "IId", "OSPARIId", 2L, "d",
"IIe", "IIe", "OSPARIIe", 2L, "e",
"IIf", "IIf", "OSPARIIf", 2L, "f",
"III", "III", "OSPARIII", 3L, NA,
"IV", "IV", "OSPARIV", 4L, NA,
"V", "V", "OSPARV", 5L, NA
)
bind_rows(pin_get("b1_abundance_data_ices_analysis"),
pin_get("b1_abundance_data_ices_excluded")) %>%
mutate(ices_colony_id = as.character(ices_colony_id),
colony_name = str_replace_all(colony_name, "/", "-")) %>%
select("SiteID" = ices_colony_id,
"SUBADMIN" = country,
"Colony" = colony_name,
"OSPAR_REGION" = area_reference,
"OSPAR_SUBREGION" = birds_subdivision) %>%
distinct() %>%
bind_rows(ospar_subregions) %>%
write_csv(path("data", "BreedingSites ABUNDANCE.csv"))
Regional Weightings that have zero counts for the source_year_weighting have been excluded.
regional_weightings %>%
dplyr::rename("Country" = country,
"AphiaID" = aphia_id,
"OSPAR_region" = ospar_region,
"OSPAR_subregion" = ospar_subregion,
"Weighting_value" = weighting_value,
"Source_year_weighting" = source_year_weighting,
"Count_unit" = count_unit
) %>%
select(-colony_ices_label) %>%
write_csv(path("data", "RegionalWeightings.csv"))
All countries apart from Netherlands in OPSAR IId and Spain in OPSAR IV
ices_breeding_imputation <- b1_abundance_data_ices %>%
unite("colony", ices_colony_id, colony_name, sep = "~") %>%
mutate(colony = str_replace_all(colony, "/", "-")) %>%
filter(str_to_lower(count_flag) == "breeding_data") %>%
filter(!(country %in% c("ES", "NL"))) %>%
select("Country" = country,
"Colony" = colony,
"CommonName" = common_name,
"Species" = scientific_name,
"Year" = year,
"Count" = count,
"Sample" = sample_breeding,
"Plot" = plot_combination,
"OSPAR" = area_reference,
"OSPARsubRegion" = birds_subdivision)
# sum whole colony counts where there is more than one count for each species
# and colony per year [requested by Ian Mitchell, 2016-17]
ices_breeding_imputation %>%
filter(Sample == 2) %>%
group_by(across(-Count)) %>%
dplyr::summarise(Count = sum(Count)) %>%
# plot counts are not summed as they can be multiple counts which is handled in
# the imputation step
bind_rows(
ices_breeding_imputation %>%
filter(Sample == 1)) %>%
relocate(Count, .after = "Year") %>%
write_tsv(path("data", "breeding", "ForImputationBreeding.txt"))
ices_breeding_skip_imputation <- b1_abundance_data_ices %>%
filter(str_to_lower(count_flag) == "breeding_data") %>%
filter(country %in% c("ES", "NL")) %>%
select(ices_colony_id,
colony_name,
year,
count,
count_method,
common_name,
count_flag,
country,
area_reference,
birds_subdivision) %>%
rename("SiteID" = ices_colony_id,
"count_type" = count_method,
"CommonName" = common_name) %>%
select(SiteID:CommonName) %>%
write_csv(path("data", "breeding", "SkipImputationBreeding.csv"))
All countries apart from Netherlands in OSPAR IId and Great Britain OPSAR IIa, IId, IIe, IIf, III.
ices_wintering_imputation <- b1_abundance_data_ices %>%
unite("colony", ices_colony_id, colony_name, sep = "~") %>%
mutate(colony = str_replace_all(colony, "/", "-")) %>%
filter(str_to_lower(count_flag) == "non_breeding") %>%
filter(!(country %in% c("NL", "GB"))) %>%
select("Country" = country,
"Colony" = colony,
"CommonName" = common_name,
"Species" = scientific_name,
"Year" = year,
"Count" = count,
"Sample" = sample_breeding,
"Plot" = plot_combination,
"OSPAR" = area_reference,
"OSPARsubRegion" = birds_subdivision)
# sum the counts where there is more than one count per species within a year
# for a colony. This relates to counts for subspecies
ices_wintering_imputation %>%
group_by(across(-Count)) %>%
dplyr::summarise(Count = sum(Count)) %>%
relocate(Count, .after = "Year") %>%
write_tsv(path("data", "wintering", "ForImputationWintering.txt"))
Netherlands in OSPAR IId and Great Britain OPSAR IIa, IId, IIe, IIf, III.
ices_wintering_skip_imputation <- b1_abundance_data_ices %>%
filter(str_to_lower(count_flag) == "non_breeding") %>%
filter(country %in% c("NL", "GB")) %>%
select(ices_colony_id,
colony_name,
year,
count,
count_method,
common_name,
count_flag,
country,
area_reference,
birds_subdivision)
# sum the counts where there is more than one count per species within a year
# for a colony. This relates to counts for subspecies
ices_wintering_skip_imputation <- ices_wintering_skip_imputation %>%
group_by(across(-count)) %>%
dplyr::summarise(count = sum(count)) %>%
ungroup()
# export
ices_wintering_skip_imputation <- ices_wintering_skip_imputation %>%
rename("SiteID" = ices_colony_id,
"count_type" = count_method,
"CommonName" = common_name) %>%
select(SiteID:year, count, count_type, CommonName) %>%
write_csv(path("data", "wintering", "SkipImputationWintering.csv"))