#setwd("F:/PROJET JUMEAUX INED-MNHM/DATABASE/RAW DATA & METADATA/FRA")
setwd ("D:/NextCloud/HMBD_partage/Input data, metadata and country-specific codes/FRA")

# ******************************************************************************
#                                  FRANCE
#
# This file provides the calculations performed on the input data for 
# France, as used in the construction of the Human Multiple Births Database
#
# ******************************************************************************


library(dplyr)
library(tidyr)
library(openxlsx)
library(ggplot2)
library(forecast)



data <- read.xlsx("FRA_InputData_25.11.2024.xlsx", sheet = "input data")

data$Year <- as.numeric(data$Year)
head(data)
data %>% arrange(Year) -> data



data %>%
  filter(!is.na(Source)) %>%
  filter(Year != 1870) %>%
  mutate(Twinning_rate = (Twin_deliveries / Total_deliveries) * 1000,
         Multiple_rate = (Multiple_deliveries / Total_deliveries) * 1000) %>%
  ungroup() -> data




# Check discrepancies ...........................................................
data %>%
  mutate(check1 = round(Total_deliveries - Singletons - Multiple_deliveries, 2),
         check2 = round(Total_children - Singletons - Multiple_children, 2)) %>%
  rowwise() %>%
  mutate(check3 = round(Multiple_deliveries - sum(Twin_deliveries,
                                                  Triplet_deliveries,
                                                  Quadruplet_plus_deliveries,
                                                  na.rm = T)),
         check4 = round(Multiple_children - sum(Twin_children, Triplet_children,
                                                Quadruplet_plus_children, na.rm = T))) %>%
  ungroup() %>%
  as.data.frame() %>%
  filter(check1 != 0 | check2 != 0 | check3 != 0 | check4 != 0) -> check

rm(check)




# Identify outliers.............................................................
ggplot(data = data) +
  geom_point(aes(x = Year, y = Twinning_rate, shape = as.factor(Stillbirths))) +
  geom_point(aes(x = Year, y = Multiple_rate, shape = as.factor(Stillbirths)), colour = "red")


outliers_tr <- tsoutliers(data$Twinning_rate)
outliers_mr <- tsoutliers(data$Multiple_rate)


data %>% 
  select(Source, Year, Twinning_rate, Multiple_rate) %>%
  mutate(outlier = ifelse(row_number() %in% outliers_tr$index |
                            row_number() %in% outliers_mr$index,
                          1, 0)) -> check

subset(check, outlier == 1) # rates for the year 1919 are identified as outliers 
rm(check, outliers_tr, outliers_mr)


# Save data.....................................................................

data_FRA <- data

write.table(data_FRA, 
            "D:/NextCloud/HMBD_partage/Pooled database/FRA_ALLDATA.txt",
            row.names = F)




  


