Script prepared to collate the weighting value submitted by OSPAR countries to ICES. The script will collate the information by country and will select the most recent weighting value per species / country /region / sub region.

The script has been tailored on the last data set retrival (November 2021) and it may be re-arranged for future collating.

# set config
Sys.setenv(R_CONFIG_ACTIVE = "production")

setwd(config::get("collate_regional_weighting_data_wd"))
data_final<-read.csv("Regional_weightingsAndBaseLine.csv")
library(tidyverse)
##save.image("Weighting_factor.Rdata")
load("Weighting_factor.Rdata")
##names(data_final)
colnames(data_final)[1]<-"Country"
unique(data_final$Notes)
data_final<-data_final[,-11]
##We drop the Notes field that is NULL

data_final$Country_v2<-data_final$Country

##Renaming the conutry properly according with B1 abundance data
data_final$Country_v2[data_final$Country=="GG"]<-"GB"
data_final$Country_v2[data_final$Country=="IM"]<-"GB"
data_final$Country_v2[data_final$Country=="JE"]<-"GB"
unique(data_final$Country_v2)
##Importing Sweden weighting file
data_final<-data_final[data_final$Country!="SE",]
data_SE<-read.csv("weighting_Sweden_2021.csv")
names(data_final)
names(data_SE)
unique(data_SE$Country)
data_SE$Country_v2<-data_SE$Country
colnames(data_SE)<-colnames(data_final)
data_final<-rbind(data_final,data_SE)
unique(data_final$Country_v2)
##Importing Norway weighting file
data_NO<-read.csv("BaselinesAndWeightings_Norway_2016.csv")
colnames(data_NO)[1]<-"Country"
names(data_final)
names(data_NO)
unique(data_NO$Country)
data_NO$Country_v2<-data_NO$Country
colnames(data_NO)<-colnames(data_final)
data_final<-rbind(data_final,data_NO)
unique(data_final$Country_v2)
##Importing Belgium weighting file
data_BE<-read.csv("BaselinesAndWeightings_BE_2016.csv")
colnames(data_BE)[1]<-"Country"
names(data_final)
names(data_BE)
unique(data_BE$Country)
data_BE$Country_v2<-data_BE$Country
colnames(data_BE)<-colnames(data_final)
data_final<-rbind(data_final,data_BE)
unique(data_final$Birds_subdivision)
unique(data_final$Country_v2)

data_final$Birds_subdivision[data_final$Birds_subdivision=="c"]<-"2c"
data_final$Birds_subdivision[data_final$Birds_subdivision=="d"]<-"2d"

unique(data_final$Birds_subdivision)
unique(data_final$Country)
##Importing GB/IE weighting file

data_UKIE<-read.csv("BaselinesAndWeightings_UK_IE_CI_IoM_202_USE.csv")
colnames(data_UKIE)[1]<-"Country"
unique(data_UKIE$Country)
data_UKIE$Country_v2[data_UKIE$Country=="Isle of Man"]<-"GB"
data_UKIE$Country_v2[data_UKIE$Country=="Channel Islands"]<-"GB"
data_UKIE$Country_v2[data_UKIE$Country=="United Kingdom"]<-"GB"
data_UKIE$Country_v2[data_UKIE$Country=="Republic of Ireland"]<-"IE"

unique(data_UKIE$Country_v2)

names(data_UKIE)
names(data_final)
data_UKIE<-data_UKIE[,-11]
colnames(data_UKIE)<-colnames(data_final)


unique(data_UKIE$Birds_subdivision)

colnames(data_UKIE)<-colnames(data_final)
data_final<-rbind(data_final,data_UKIE)
###check on data final
unique(data_final$Birds_subdivision)
unique(data_final$Country_v2)
###check the group 1n&m ans split per correct subdivision
check<-data_final[data_final$Birds_subdivision=="1m&n",]
check$Birds_subdivision<-"1m"

data_final$Birds_subdivision[data_final$Birds_subdivision=="1m&n"]<-"1n"

data_final<-rbind(data_final,check)

unique(data_final$Country_v2)

#####Splitting the data weights values for REgiona and SubRegional

data_final_Subregion<-data_final
data_final_Region<-data_final

####Creating the Subregional data_final

data_final_Subregion<-data_final_Subregion[data_final_Subregion$Birds_subdivision!="2",]
data_final_Subregion<-data_final_Subregion[data_final_Subregion$Birds_subdivision!="1",]

unique(data_final_Subregion$Birds_subdivision)
##Summarize the count for GB and channel island: we will add the count for different island
GB_Subregion<-data_final_Subregion[data_final_Subregion$Country_v2=="GB",]
unique(GB_Subregion$Country)

##Guernsey
quale<-(GB_Subregion$Country=="GG" & GB_Subregion$Weighting_value==0)
GB_Subregion<-GB_Subregion[-which(quale),]
##Jersey
quale<-(GB_Subregion$Country=="JE" & GB_Subregion$Weighting_value==0)
GB_Subregion<-GB_Subregion[-which(quale),]
##IM
quale<-(GB_Subregion$Country=="IM" & GB_Subregion$Weighting_value==0)
GB_Subregion<-GB_Subregion[-which(quale),]
##Channel Islands
quale<-(GB_Subregion$Country=="Channel Islands" & GB_Subregion$Weighting_value==0)
GB_Subregion<-GB_Subregion[-which(quale),]
##Isle of Man
quale<-(GB_Subregion$Country=="Isle of Man" & GB_Subregion$Weighting_value==0)
GB_Subregion<-GB_Subregion[-which(quale),]
GB_Subregion$Country[GB_Subregion$Country=="United Kingdom"]<-"GB"

###Sum the weighting values for each bird/subdivision
check_duplicated_GB<-GB_Subregion[duplicated(paste(GB_Subregion$Country,GB_Subregion$Species_name,GB_Subregion$Birds_subdivision,GB_Subregion$Source_year_weighting,GB_Subregion$count_flag)),]

####remove duplication
GB_Subregion_no_duplication<-GB_Subregion%>%distinct(GB_Subregion$Country,GB_Subregion$Species_name,GB_Subregion$Birds_subdivision,GB_Subregion$Source_year_weighting, .keep_all = TRUE)
GB_Subregion_ND<-GB_Subregion_no_duplication
##selecting the  last year of observation

##replacing NULL value
GB_Subregion_ND$Source_year_weighting[GB_Subregion_ND$Source_year_weighting=="NULL"]<-2000
GB_Subregion_ND$Source_year_weighting<-as.numeric(GB_Subregion_ND$Source_year_weighting)


GB_Subregion_ND$combo <- paste(GB_Subregion_ND$count_flag,GB_Subregion_ND$Species_name,GB_Subregion_ND$Birds_subdivision,GB_Subregion_ND$Country, sep="_")
GB_Subregion_ND$combo <- as.factor(GB_Subregion_ND$combo)

levels(GB_Subregion_ND$combo) <- as.character(1:length(levels(GB_Subregion_ND$combo)))

nplace <- length(GB_Subregion_ND$combo)

GB_Subregion_ND_Year<-data.frame()
b<-data.frame()
a<-data.frame()

i=1

for(i in 1:nplace){

if(nrow(unique(GB_Subregion_ND[GB_Subregion_ND$combo == as.character(i),]))>1)
{
  b<-data.frame()
  a=rbind(a, unique(GB_Subregion_ND[GB_Subregion_ND$combo == as.character(i),]))
  b<- unique(GB_Subregion_ND[GB_Subregion_ND$combo == as.character(i),])
print(i)
GB_Subregion_ND_Year <- rbind(GB_Subregion_ND_Year, b[(b$Source_year_weighting==max(b$Source_year_weighting)),])}
else {
  GB_Subregion_ND_Year <- rbind(GB_Subregion_ND_Year, GB_Subregion_ND[GB_Subregion_ND$combo ==as.character(i),])
  }
}

##aggregating weighting number
GB_Subregion_ND_Year$combo2 <- paste(GB_Subregion_ND_Year$Species_name,GB_Subregion_ND_Year$Birds_subdivision, GB_Subregion_ND_Year$count_flag, sep="_")
GB_Subregion_ND_Year$combo2 <- as.factor(GB_Subregion_ND_Year$combo2)
unique(GB_Subregion_ND_Year$combo2)
levels(GB_Subregion_ND_Year$combo2) <- as.character(1:length(levels(GB_Subregion_ND_Year$combo2)))
#GB_Subregion_ND_Year

#lallo$combo2<-droplevels(lallo$combo2)
#GB_Subregion_ND_Year<-lallo
#GB_Subregion_ND_Year<-dplyr::filter(GB_Subregion_ND_Year, Species_name=="Alca torda" | Species_name=="Rissa tridactyla")
nplace <- length(levels(GB_Subregion_ND_Year$combo2))

GB_Subregion_ND_Year_correct<-data.frame()
b<-data.frame()
a<-data.frame()
somma=0

for(i in 1:nplace){
  if(nrow(GB_Subregion_ND_Year[GB_Subregion_ND_Year$combo2 == as.character(i),])>1)
  {
    b<-data.frame()
    a=rbind(a, unique(GB_Subregion_ND_Year[GB_Subregion_ND_Year$combo2 == as.character(i),]))
    b<- GB_Subregion_ND_Year[GB_Subregion_ND_Year$combo2 == as.character(i),]
    print(i)

    somma=sum(b$Weighting_value)

    b<-b[(b$Source_year_weighting==max(b$Source_year_weighting)),]
    b$Weighting_value<-somma
    GB_Subregion_ND_Year_correct <- rbind(GB_Subregion_ND_Year_correct, b)
    }
  else {
    GB_Subregion_ND_Year_correct <- rbind(GB_Subregion_ND_Year_correct, GB_Subregion_ND_Year[GB_Subregion_ND_Year$combo2 ==as.character(i),])
  }
}


GB_Subregion_ND_Year_correct_no_duplication<-GB_Subregion_ND_Year_correct%>%distinct(GB_Subregion_ND_Year_correct$Species_name,GB_Subregion_ND_Year_correct$Birds_subdivision,GB_Subregion_ND_Year_correct$Source_year_weighting, .keep_all = TRUE)

###generating the correct file for GB
GB_subdivision_ok<-GB_Subregion_ND_Year_correct_no_duplication

GB_subdivision_dereplicated<-GB_subdivision_ok[,-c(12:22)]
#####Check and de-replication for the other country.
All_Subregion<-data_final_Subregion[data_final_Subregion$Country_v2!="GB",]
unique(All_Subregion$Country_v2)

check_duplicated_All<-All_Subregion[duplicated(paste(All_Subregion$Country_v2,All_Subregion$Species_name,All_Subregion$Birds_subdivision,All_Subregion$Source_year_weighting,All_Subregion$count_flag)),]

####remove duplication
All_Subregion_no_duplication<-All_Subregion%>%distinct(All_Subregion$Country,All_Subregion$Species_name,All_Subregion$Birds_subdivision,All_Subregion$Source_year_weighting, .keep_all = TRUE)
All_Subregion_ND<-All_Subregion_no_duplication

##selecting the  last year of observation

##checking the correct assignement of the year
unique(All_Subregion_ND$Source_year_weighting)
All_Subregion_ND$Source_year_weighting[All_Subregion_ND$Source_year_weighting=="2006-2014"]<-"2014"
All_Subregion_ND$Source_year_weighting[All_Subregion_ND$Source_year_weighting=="1979-1984"]<-"1984"
All_Subregion_ND$Source_year_weighting[All_Subregion_ND$Source_year_weighting=="NULL"]<-All_Subregion_ND$source_year_baseline[All_Subregion_ND$Source_year_weighting=="NULL"]
All_Subregion_ND$Source_year_weighting[All_Subregion_ND$Source_year_weighting=="NULL"]<-"2020"

All_Subregion_ND$combo <- paste(All_Subregion_ND$count_flag,All_Subregion_ND$Species_name,All_Subregion_ND$Birds_subdivision,All_Subregion_ND$Country_v2, sep="_")
All_Subregion_ND$combo <- as.factor(All_Subregion_ND$combo)

levels(All_Subregion_ND$combo) <- as.character(1:length(levels(All_Subregion_ND$combo)))

nplace <- length(All_Subregion_ND$combo)

All_Subregion_ND_Year<-data.frame()
b<-data.frame()
a<-data.frame()

i=1

for(i in 1:nplace){

  if(nrow(unique(All_Subregion_ND[All_Subregion_ND$combo == as.character(i),]))>1)
  {
    b<-data.frame()
    a=rbind(a, unique(All_Subregion_ND[All_Subregion_ND$combo == as.character(i),]))
    b<- unique(All_Subregion_ND[All_Subregion_ND$combo == as.character(i),])
    print(i)
    All_Subregion_ND_Year <- rbind(All_Subregion_ND_Year, b[(b$Source_year_weighting==max(b$Source_year_weighting)),])}
  else {
    All_Subregion_ND_Year <- rbind(All_Subregion_ND_Year, All_Subregion_ND[All_Subregion_ND$combo ==as.character(i),])
  }
}

All_Subregion_ND_Year_no_duplication<-All_Subregion_ND_Year%>%distinct(All_Subregion_ND_Year$count_flag,All_Subregion_ND_Year$Country_v2,All_Subregion_ND_Year$Species_name,All_Subregion_ND_Year$Birds_subdivision,All_Subregion_ND_Year$Source_year_weighting, .keep_all = TRUE)
###generating the correct file for the non GB country
All_subdivision_ok<-All_Subregion_ND_Year_no_duplication

All_subdivision_dereplicated<-All_subdivision_ok[,-c(12:21)]

Subregion_derep_weighting<-rbind(All_subdivision_dereplicated,GB_subdivision_dereplicated)
write.table(Subregion_derep_weighting,"SubRegion_weightings_value_collated_2.csv",sep=",",row.names=FALSE)

save.image("Weighting_factor.Rdata")