#scenario-based community composition comparisons
rm(list=ls())
library(vegan)
library(emmeans)
library(ggplot2)
library(ggrepel)
library(gllvm)
library(patchwork)
library(gridExtra)
library(ggpubr)
library(gstat)
library(reshape)
library(tidyverse)
#devtools::install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
library(pairwiseAdonis)

# Data loading ----
setwd("D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/Essex data repository")
load("s1_sppabundance.Rdata")
df_s1_clean <- s1_sppabundance

df_s1_clean %>% 
  group_by(BACI,statusatsampling) %>%
  dplyr::summarise(n=n())

SpecNames <- colnames(df_s1_clean[, 6:169])
SpecNames
HowOften <- colSums(df_s1_clean[, SpecNames] > 0)
HowOften
plot(HowOften, type = "h")

#' In a real analysis, we would use 'sampled at least at 5 sites as threshold.
NotTheseSpecies <- colnames(df_s1_clean[, SpecNames])[HowOften < 5]
NotTheseSpecies

#' Dump these species
df_s1_1 <- select(df_s1_clean, -one_of(NotTheseSpecies))

#' And determine the new species names
NewSpeciesNames <- SpecNames[!SpecNames %in% NotTheseSpecies]
NewSpeciesNames
length(NewSpeciesNames)  #100

#Number of spp used in the analysis
Np <- length(NewSpeciesNames) 
Np #100

#' Determine the % of zeros for each species:
BenthicZeros <- 100 * colSums(df_s1_1[,NewSpeciesNames] == 0) / nrow(df_s1_1)

#' Combine these % with the name of the species:
df <- data.frame(percent_zeros = BenthicZeros,
                 species       = names(BenthicZeros))

#' Plot the results.
ggplot(df, aes(x = percent_zeros, y = species)) +
  geom_bar(stat = "identity", width = 0.2) +  
  labs(x = "Percentage of Zeros", 
       y = "Species") +
  theme_minimal() + xlim (0, 100) +
  theme(axis.text.y = element_text(size = 5))  
# Most of them exhibit a extreme high proportion of zeros

# Are categorical variable balanced?
table(df_s1_1$BACI)   #' 50 vs 16 (no)
table(df_s1_1$statusatsampling)  #' 39 vs 27 (yes)

#s1:data preparation
SpecBenthic <- as.matrix(df_s1_1[,NewSpeciesNames])
class(SpecBenthic) #matrix array
head(SpecBenthic)

CovBenthic <- df_s1_1[,c("BACI","statusatsampling","TargetMMS")]
CovBenthic$Treatment <- paste(CovBenthic$statusatsampling,CovBenthic$BACI,sep="_")

m0_s1 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               formula = ~ Treatment,
               #control.start = list(starting.val = "res"),
               #control = list(optimizer = "nlminb", maxit = 5000),
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", method = "BFGS", maxit = 20000),
               seed = 1234)

m1_s1 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               #control.start = list(starting.val = "res"),
               #control = list(optimizer = "nlminb", maxit = 5000),
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", method = "BFGS", maxit = 20000),
               seed = 1234)
AIC(m0_s1,m1_s1)

save(m0_s1,m1_s1,file = "D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/s1_gllvm.Rdata")
load("s1_gllvm.Rdata")

#diagnostic plots
par(mfrow=c(1,3))
meanY <- apply(SpecBenthic,2,mean)
varY <- apply(SpecBenthic,2,var)
plot(log(meanY),varY, log = "y", main = "Species mean-variance relationship")
points(log(sort(meanY)), sort(meanY), type = "l")
points(log(sort(meanY)), sort(meanY+ 1*meanY^2), type = "l", col=2, lty=2)
legend("bottomright", lty=1:2, legend = c("var = mean", "var = mean+phi*mu^2"), bty="n", col = 1:2)

plot(m0_s1,which = 1:2,var.colors = 1,n.plot = 100)
dev.off()

##s1:bioindicators####
gllvm::coefplot(m0_s1,cex.ylab = 0.5,y.label = T,which.X=1)
coefs <- data.frame(summary(m0_s1)$Coef.table) #coeffficients
colnames(coefs)[4] <- c("pval")
coefs_signif <- coefs[coefs$pval < 0.05,]
coefs_signif <- coefs_signif %>%
  mutate(
    Rowname = rownames(.),
    Treatment = str_split_fixed(Rowname, ":", 2)[,1],
    Treatment = gsub("Treatment","",Treatment),
    Treatment = gsub("_"," ",Treatment),
    Species = str_split_fixed(Rowname, ":", 2)[,2],
    Directionality = ifelse(Estimate > 0, "Positive", "Negative"),
    CI_lower = Estimate - 1.96*Std..Error,
    CI_upper = Estimate + 1.96*Std..Error
  ) %>%
  filter(Treatment == "After Impact") #signif spp in this treatment as bioindicators

(s1_biospp <- ggplot(coefs_signif, aes(x = Estimate, y = reorder(Species, Estimate),
                                    color = Directionality)) +
  geom_point(size = 3) +
  geom_errorbar(aes(xmin = CI_lower, xmax = CI_upper), width = 0.2, size = 1) +
  geom_vline(xintercept = 0, linetype = "dashed",size=0.5) +
  scale_color_manual(values = c("Positive" = "darkred", "Negative" = "darkblue")) +
  theme_bw(base_size = 14) +
  theme(strip.text = element_text(size = 14),
        legend.position = "none",
        axis.title = element_blank())
)

coefs_top_positive <- coefs_signif %>%
  filter(Directionality == "Positive") %>%
  slice_max(order_by = Estimate, n=3, with_ties = F)

coefs_top_negative <- coefs_signif %>%
  filter(Directionality == "Negative") %>%
  slice_min(order_by = Estimate, n=3, with_ties = F)

coefs_top <- rbind(coefs_top_positive,coefs_top_negative)

(s1_biospp_top <- ggplot(coefs_top, aes(x = Estimate, y = reorder(Species, Estimate),
                                 color = Directionality)) +
  geom_point(size = 3) +
  geom_errorbar(aes(xmin = CI_lower, xmax = CI_upper), width = 0.2, size = 1) +
  geom_vline(xintercept = 0, linetype = "dashed",size=0.5) +
  scale_color_manual(values = c("Positive" = "darkred", "Negative" = "darkblue")) +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 10)) +
  theme_bw(base_size = 14) +
  theme(axis.text = element_text(color="black")) +
  theme(strip.text = element_text(size = 14),
        legend.position = "none",
        axis.title = element_blank())
)

##s1:lv-based ordination####
#Two latent variables + No treatment for site scores (Decomm Effect + Environmental Variables)
lv_scores_s1 <- getLV(m1_s1)
site_scores_s1 <- data.frame(
  LV1 = lv_scores_s1[, 1],
  LV2 = lv_scores_s1[, 2],
  Treatment = CovBenthic[,c("Treatment")],
  statusatsampling = CovBenthic[,c("statusatsampling")],
  BACI = CovBenthic[,c("BACI")],
  Site = rownames(lv_scores_s1)
)

#check if LV scores have significant differences across treatments
adonis2(site_scores_s1[,1:2] ~ Treatment, data=site_scores_s1, method="euclidean")

(s1_ordplot <- ggplot(site_scores_s1, aes(x = LV1, y = LV2, color = Treatment)) +
  stat_ellipse(aes(group = Treatment), type = "norm" ,level = 0.95, linetype = 2, 
               size = 0.6, alpha = 0.7) + #type = "norm" force a standardised covariance ellipse
  geom_point(alpha = 0.7, size = 2) +
  stat_summary(aes(group = Treatment), fun = mean, geom = "point", 
               size = 2, stroke = 1) +
  scale_color_manual(
    name = "Treatment",
    values = c(
      "Before_Impact" = "salmon","After_Impact" = "darkred",
      "Before_Control" = "skyblue","After_Control" = "darkblue"
    ),
    labels = c(
      "Before_Impact" = "Before Impact","After_Impact" = "After Impact", 
      "Before_Control" = "Before Control","After_Control" = "After Control"
    )
  ) +
  stat_summary(
    aes(group = Treatment),fun = mean) +
  labs(x = "Latent Variable 1",y = "Latent Variable 2") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.background = element_rect(fill = "white", color = "white"),
    legend.key = element_rect(fill = "white"),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40"),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 10, color = "black"),
    panel.grid.major = element_line(color = "gray90", size = 0.3),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", size = 0.8),
    plot.background = element_rect(fill = "white"),
    panel.background = element_rect(fill = "white")
  ) +
    guides(color=guide_legend(nrow=2))
)
  
ggsave(s1_ordplot,file="D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/s1ordplot.png",height=4,width=4,dpi=600,bg="white")

##s1:permanova####
dist_bray <- vegdist(SpecBenthic, method = "bray")
#
perm <- adonis2(dist_bray ~ Treatment,
                strata = CovBenthic$TargetMMS,
                data = CovBenthic,
                permutations = 999)
perm

##s1:permanova pairwise comparisons####
treatments <- unique(CovBenthic$Treatment)
length(treatments)
pairwise_results <- list()

for(i in 1:(length(treatments)-1)){
  for(j in (i+1):length(treatments)){
    tr1 <- treatments[i]
    tr2 <- treatments[j]
    # Subset data for this treatment pair
    pair_idx <- CovBenthic$Treatment %in% c(tr1, tr2)
    pair_data <- CovBenthic[pair_idx, ]
    # Subset the Bray-Curtis distance matrix to match the pair data
    pair_dist <- as.dist(as.matrix(dist_bray)[pair_idx, pair_idx])
    # Run PERMANOVA
    result <- adonis2(pair_dist ~ Treatment,
                      data = pair_data,
                      strata = pair_data$TargetMMS, 
                      permutations = 999)
    # Save result
    pairwise_results[[paste(tr1, "vs", tr2)]] <- result
  }
}

# Extract results and create summary table
p_values <- sapply(pairwise_results, function(x) x$`Pr(>F)`[1])
f_values <- sapply(pairwise_results, function(x) x$F[1])
r2_values <- sapply(pairwise_results, function(x) x$R2[1])
df_values <- sapply(pairwise_results, function(x) x$Df[1])

# Apply Benjamini-Hochberg p-value adjustment
adjusted_p <- p.adjust(p_values, method = "BH")

# Create comprehensive results table
s1_permanova_result <- data.frame(
  Comparison = names(pairwise_results),
  Df = df_values,
  SumOfSqs = sapply(pairwise_results, function(x) x$SumOfSqs[1]),
  F_value = round(f_values, 4),
  R2 = round(r2_values, 4),
  P_value = round(p_values, 4),
  Adj_P_value = round(adjusted_p, 4),
  Significance = ifelse(adjusted_p < 0.001, "***",
                        ifelse(adjusted_p < 0.01, "**",
                               ifelse(adjusted_p < 0.05, "*",
                                      ifelse(adjusted_p < 0.1, ".", "ns"))))
)

#betadisper(homogeneity of group dispersion pairwise test)
permutest(betadisper(dist_bray,CovBenthic$Treatment)) #signif (violate PERMANOVA assumption)
anova(betadisper(dist_bray,CovBenthic$Treatment))
TukeyHSD(betadisper(dist_bray,CovBenthic$Treatment))

##s1:pcoa####
pcoa_res <- cmdscale(dist_bray, eig = T, k=2)
scores <- as.data.frame(pcoa_res$points)
colnames(scores) <- c("PCoA1","PCoA2")
scores$Treatment <- CovBenthic$Treatment
scores$Method <- c("PCoA")

(s1_pcoa <- ggplot(scores, aes(x = PCoA1, y = PCoA2, color = Treatment, group = Treatment)) +
    stat_ellipse(aes(group = Treatment), level = 0.95, linetype = 2, 
                 linewidth = 0.6, alpha = 0.7) +
    geom_point(alpha = 0.7, size = 2) +
    stat_summary(aes(group = Treatment), fun = mean, geom = "point", 
                 size = 2, stroke = 1) +
    scale_color_manual(
      name = "Treatment",
      values = c(
        "Before_Impact" = "salmon", "After_Impact" = "darkred",
        "Before_Control" = "skyblue", "After_Control" = "darkblue"
      ),
      labels = c(
        "Before_Impact" = "Before Impact", "After_Impact" = "After Impact", 
        "Before_Control" = "Before Control", "After_Control" = "After Control"
      )
    ) +
    stat_summary(aes(group = Treatment), fun = mean) +
    labs(
      x = paste0("PCoA1 (", round(pcoa_res$eig[1] / sum(pcoa_res$eig) * 100, 1), "%)"),
      y = paste0("PCoA2 (", round(pcoa_res$eig[2] / sum(pcoa_res$eig) * 100, 1), "%)")
    ) +
    theme_bw(base_size = 12) +
    theme(
      legend.position = "bottom",
      legend.title = element_blank(),
      legend.text = element_text(size = 12),
      legend.background = element_rect(fill = "white", color = "white"),
      legend.key = element_rect(fill = "white"),
      plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
      plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40"),
      axis.title = element_text(face = "bold", size = 12),
      axis.text = element_text(size = 10, color = "black"),
      panel.grid.major = element_line(color = "gray90", size = 0.3),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", size = 0.8),
      plot.background = element_rect(fill = "white"),
      panel.background = element_rect(fill = "white")
    ) +
    guides(color=guide_legend(nrow=2))
)

##s1:pcoa paired centroid distance####
#before impact vs before control
bibc_scores <- scores[scores$Treatment %in% c("Before_Impact","Before_Control"),]
bibc_centroids <- aggregate(bibc_scores[,1:2],
                            by = list(Treatment=bibc_scores$Treatment),
                            FUN = mean)
bibc_centroids_dist <- as.matrix(dist(bibc_centroids[,2:3],method="euclidean"))
bibc_centroids_dist[2]
dist(bibc_centroids[,2:3])[1] #0.1375
#after impact vs after control
aiac_scores <- scores[scores$Treatment %in% c("After_Impact","After_Control"),]
aiac_centroids <- aggregate(aiac_scores[,1:2],
                            by = list(Treatment=aiac_scores$Treatment),
                            FUN = mean)
aiac_centroids_dist <- as.matrix(dist(aiac_centroids[,2:3],method="euclidean"))
dist(aiac_centroids[,2:3])[1] #0.2358

#bootstrap centroid distance
bootstrap_centroid <- function(scores, impact_label, control_label, nboot=999){
  dists <- numeric(nboot)
  for(b in 1:nboot){
    # resample within each treatment
    boot_scores <- do.call(rbind, lapply(c(impact_label, control_label), function(tr){ #do.call(rbind,...) stacks two resampled groups together; lapply(..) do this for both impact and control group
      sub <- scores[scores$Treatment==tr, ]
      sub[sample(1:nrow(sub), replace=TRUE), ] #resample sub rows with replacement (put balls back into the basket)
    }))
    # recompute centroid distance
    centroids <- aggregate(boot_scores[,1:2], 
                           by=list(Treatment=boot_scores$Treatment), 
                           FUN=mean)
    dists[b] <- dist(centroids[,2:3])[1]
  }
  return(dists)
}
bibc_dist <- bootstrap_centroid(scores,"Before_Impact","After_Impact")
bibc_dist_summary <- quantile(bibc_dist,probs = c(0.025,0.5,0.975)) 
aiac_dist <- bootstrap_centroid(scores,"After_Impact","After_Control")
aiac_dist_summary <- quantile(aiac_dist,probs = c(0.025,0.5,0.975)) 

diff_dist <- aiac_dist - bibc_dist
mean(diff_dist < 0) # proportion of after distance smaller than before distance

s1_df <- data.frame(
  Comparison = c("BIBC", "AIAC", "AIAC - BIBC"),
  Mean = c(mean(bibc_dist), mean(aiac_dist), mean(diff_dist)),
  Lower = c(quantile(bibc_dist, 0.025), quantile(aiac_dist, 0.025), quantile(diff_dist, 0.025)),
  Upper = c(quantile(bibc_dist, 0.975), quantile(aiac_dist, 0.975), quantile(diff_dist, 0.975))
)
s1_df$Comparison <- factor(s1_df$Comparison,levels=c("BIBC","AIAC","AIAC - BIBC"))
(s1_cd <- ggplot(s1_df, aes(x = Comparison, y = Mean)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=Lower, ymax=Upper), width=0.2) +
  geom_hline(yintercept=0, linetype="dashed", colour="grey40") +
  labs(x=NULL,y="Centroid distance")+
  theme_bw(base_size=14)
)

#overall decommissioning effect
m_s1 <- manova(cbind(LV1,LV2) ~ Treatment, data=site_scores_s1)
summary(m_s1) # NOT signif
# Since not significant, there will be no need for pairwise comparison


#S3----
#MMS and sites
setwd("D:/UoE/Writings/Data Chapter 2/LO/revisions/coding")
df_MMS_s3 <- read.csv("df_decommeffect.csv") #4 decomm MMS
df_MMS_s3 <- df_MMS_s3[df_MMS_s3$TargetMMS == "North West Hutton",]
df_MMS_s3 <- df_MMS_s3 %>%
  mutate(statusatsampling = gsub("PRECOMMISSIONING","baseline",statusatsampling),
         statusatsampling = gsub("ACTIVE","Before",statusatsampling),
         statusatsampling = gsub("NOT IN USE","After",statusatsampling),
         statusatsampling = factor(statusatsampling,levels=c("Before","After"))) %>%
  mutate(BACI = case_when(distance <= 500 ~ "Impact",  #500 or 1500
                          distance > 500 ~ "Control"),
         BACI = factor(BACI,levels=c("Impact","Control"))) %>%
  mutate(Treatment = paste(statusatsampling,BACI))

df_s3 <- df_MMS_s3 %>%
  left_join(spp_df,by=c("SITE")) %>%
  select(SITE,TargetMMS,BACI,statusatsampling,distance,ValidName,MEAN) %>%
  distinct()
head(df_s3)

setwd("D:/UoE/Writings/Data Chapter 2/LO/revisions/coding")
spptaxa <- read.csv("speciesrank.csv")

##s3:cleaned spp list####
df_s3_clean <- df_s3 %>%
  left_join(spptaxa,by = c("ValidName")) %>%
  dplyr::select(-ValidName) %>%
  filter(Rank %in% c("species","genus","family")) %>%
  dplyr::select(-Rank) %>%
  group_by(SITE,TargetMMS,BACI,statusatsampling,distance,NewValidName) %>%
  dplyr::summarise(MEAN = sum(MEAN), .groups = "drop") %>%
  pivot_wider(values_from = "MEAN",
              names_from = "NewValidName") %>%
  mutate(across(where(is.numeric), ~ replace_na(.,0))) %>%
  arrange(SITE) 

# s3_sppabundance <- df_s3_clean
# save(s3_sppabundance,file="D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/Essex data repository/s3_sppabundance.Rdata")

df_s3_clean %>% 
  group_by(BACI,statusatsampling) %>%
  dplyr::summarise(n=n())

SpecNames <- colnames(df_s3_clean[, 6:271]) #271 for df_s3_clean
SpecNames
HowOften <- colSums(df_s3_clean[, SpecNames] > 0)
HowOften
plot(HowOften, type = "h")

#' In a real analysis, we would use 'sampled at least at 5 sites'
#' as threshold. 
NotTheseSpecies <- colnames(df_s3_clean[, SpecNames])[HowOften < 5]
NotTheseSpecies

#' Dump these species
df_s3_1 <- select(df_s3_clean, -one_of(NotTheseSpecies))

#' And determine the new species names
NewSpeciesNames <- SpecNames[!SpecNames %in% NotTheseSpecies]
NewSpeciesNames
length(NewSpeciesNames)  #115

#Number of spp used in the analysis
Np <- length(NewSpeciesNames) 
Np

#' Determine the % of zeros for each species:
BenthicZeros <- 100 * colSums(df_s3_1[,NewSpeciesNames] == 0) / nrow(df_s3_1)

#' Combine these % with the name of the species:
df <- data.frame(percent_zeros = BenthicZeros,
                 species       = names(BenthicZeros))

#' Plot the results.
ggplot(df, aes(x = percent_zeros, y = species)) +
  geom_bar(stat = "identity", width = 0.2) +  
  labs(x = "Percentage of Zeros", 
       y = "Species") +
  theme_minimal() + xlim (0, 100) +
  theme(axis.text.y = element_text(size = 5))  
# Most of them exhibit a extreme high proportion of zeros

#s3:data preparation
SpecBenthic <- as.matrix(df_s3_1[,NewSpeciesNames])
class(SpecBenthic) #matrix array
head(SpecBenthic)

CovBenthic <- df_s3_1[,c("BACI","statusatsampling")] #note that not including TargetMMS as there is only 1 level (gllvm doesn't like it)
CovBenthic$Treatment <- paste(CovBenthic$statusatsampling,CovBenthic$BACI,sep="_")

m0_s3 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               formula = ~ Treatment,
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", method = "BFGS", maxit = 20000),
               seed = 1234)
m1_s3 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", method = "BFGS", maxit = 20000),
               seed = 1234)
m2_s3 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 1,
               sd.errors = T,
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", method = "BFGS", maxit = 20000),
               seed = 1234)

AIC(m0_s3,m1_s3) #Treatment is an important fixed term
AIC(m1_s3,m2_s3) #1 latent variable is better
save(m0_s3,m1_s3,file="D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/s3_gllvm.Rdata")
load("s3_gllvm.Rdata")

#diagnostic plots (perfect!)
dev.off()
par(mfrow=c(1,3))
meanY <- apply(SpecBenthic,2,mean)
varY <- apply(SpecBenthic,2,var)
plot(log(meanY),varY, log = "y", main = "Species mean-variance relationship")
points(log(sort(meanY)), sort(meanY), type = "l")
points(log(sort(meanY)), sort(meanY+ 1*meanY^2), type = "l", col=2, lty=2)
legend("bottomright", lty=1:2, legend = c("var = mean", "var = mean+phi*mu^2"), bty="n", col = 1:2)

plot(m0_s3,which = 1:2,var.colors = 1,n.plot = 100)
dev.off()

##s3:bioindicators####
dev.off()
gllvm::coefplot(m0_s3,cex.ylab = 0.5,y.label = T,which.X=1)
coefs <- data.frame(summary(m0_s3)$Coef.table) #coeffficients
colnames(coefs)[4] <- c("pval")
coefs_signif <- coefs[coefs$pval < 0.05,]
coefs_signif <- coefs_signif %>%
  mutate(
    Rowname = rownames(.),
    Treatment = str_split_fixed(Rowname, ":", 2)[,1],
    Treatment = gsub("Treatment","",Treatment),
    Treatment = gsub("_"," ",Treatment),
    Species = str_split_fixed(Rowname, ":", 2)[,2],
    Directionality = ifelse(Estimate > 0, "Positive", "Negative"),
    CI_lower = Estimate - 1.96*Std..Error,
    CI_upper = Estimate + 1.96*Std..Error
  ) %>%
  filter(Treatment == "After Impact") #signif spp in this treatment as bioindicators

(s3_biospp <- ggplot(coefs_signif, aes(x = Estimate, y = reorder(Species, Estimate),
                                       color = Directionality)) +
    geom_point(size = 3) +
    geom_errorbar(aes(xmin = CI_lower, xmax = CI_upper), width = 0.2, size = 1) +
    geom_vline(xintercept = 0, linetype = "dashed",size=0.5) +
    scale_color_manual(values = c("Positive" = "darkred", "Negative" = "darkblue")) +
    theme_bw(base_size = 14) +
    theme(strip.text = element_text(size = 14),
          legend.position = "none",
          axis.title = element_blank(),
          axis.text = element_text(color="black"))
)

ggsave(s3_biospp,file = "D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/s3_allbiospp.png",
       bg="white",width=6,height=6,dpi=600)

coefs_top_positive <- coefs_signif %>%
  filter(Directionality == "Positive") %>%
  slice_max(order_by = Estimate, n=3, with_ties = F)

coefs_top_negative <- coefs_signif %>%
  filter(Directionality == "Negative") %>%
  slice_min(order_by = Estimate, n=3, with_ties = F)

coefs_top <- rbind(coefs_top_positive,coefs_top_negative)

(s3_biospp_top <- ggplot(coefs_top, aes(x = Estimate, y = reorder(Species, Estimate),
                                        color = Directionality)) +
    geom_point(size = 3) +
    geom_errorbar(aes(xmin = CI_lower, xmax = CI_upper), width = 0.2, size = 1) +
    geom_vline(xintercept = 0, linetype = "dashed",size=0.5) +
    scale_color_manual(values = c("Positive" = "darkred", "Negative" = "darkblue")) +
    scale_y_discrete(labels = function(x) str_wrap(x, width = 10)) +
    theme_bw(base_size = 14) +
    theme(axis.text = element_text(color="black")) +
    theme(strip.text = element_text(size = 14),
          legend.position = "none",
          axis.title = element_blank())
)

##s3:lv-based ordination####
# DON'T USE THIS!!!(2D ordination)
lv_scores_s3 <- getLV(m1_s3)
site_scores_s3 <- data.frame(
  LV1 = lv_scores_s3[, 1],
  LV2 = lv_scores_s3[, 2],
  Treatment = CovBenthic[,c("Treatment")],
  statusatsampling = CovBenthic[,c("statusatsampling")],
  BACI = CovBenthic[,c("BACI")],
  Site = rownames(lv_scores_s3)
)

adonis2(site_scores_s3[,1:2] ~ Treatment, data = site_scores_s3, method = "euclidean")

ggplot(site_scores_s3, aes(x = LV1, y = LV2, color = Treatment)) +
  stat_ellipse(aes(group = Treatment), level = 0.95, linetype = 2, 
               size = 0.6, alpha = 0.7) +
  geom_point(alpha = 0.7, size = 2) +
  stat_summary(aes(group = Treatment), fun = mean, geom = "point", 
               size = 2, stroke = 1) +
  scale_color_manual(
    name = "Treatment",
    values = c(
      "Before_Impact" = "salmon","After_Impact" = "darkred",
      "Before_Control" = "skyblue","After_Control" = "darkblue"
    ),
    labels = c(
      "Before_Impact" = "Before Impact","After_Impact" = "After Impact", 
      "Before_Control" = "Before Control","After_Control" = "After Control"
    )
  ) +
  stat_summary(
    aes(group = Treatment),fun = mean) +
  labs(x = "Latent Variable 1",y = "Latent Variable 2") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "right",
    legend.title = element_text(face = "bold", size = 11),
    legend.text = element_text(size = 10),
    legend.background = element_rect(fill = "white", color = "gray50"),
    legend.key = element_rect(fill = "white"),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40"),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 10, color = "black"),
    panel.grid.major = element_line(color = "gray90", size = 0.3),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", size = 0.8),
    plot.background = element_rect(fill = "white"),
    panel.background = element_rect(fill = "white")
  )

#DONT'T USE IT - 1D ordination
lv_scores_s3 <- getLV(m2_s3) 
site_scores_s3 <- data.frame(
  LV1 = lv_scores_s3[,1],
  Treatment = CovBenthic$Treatment
)
site_scores_s3_summary <- site_scores_s3 %>%
  group_by(Treatment) %>%
  summarise (
    mean_lv1 = mean(LV1),
    se_LV1 = sd(LV1) / sqrt(n()),
    .groups = "drop"
  )
s3_lv1anova <- aov(LV1 ~ Treatment, data= site_scores_s3)
summary(s3_lv1anova)
TukeyHSD(s3_lv1anova) #no major changes across paired groups

head(site_scores_s3_summary)
ggplot(site_scores_s3, aes(x = Treatment, y = LV1, color = Treatment)) +
  geom_point(size = 3, alpha = 0.7) +
  theme_bw(base_size=14) +
  labs(x=NULL)+
  theme(legend.position = "right",
        axis.text = element_text(color="black"))


#overall decommissioning effect
m_s3 <- manova(cbind(LV1,LV2) ~ Treatment, data=site_scores_s3)
summary(m_s3) # NOT signif


nmds_s3 <- metaMDS(SpecBenthic, distance = "bray", k=2, trymax=100)
nmds_s3df <- data.frame(
  NMDS1 = nmds_s3$points[,1],
  NMDS2 = nmds_s3$points[,2],
  Treatment = site_scores_s3$Treatment,
  Method = "NMDS"
)
gllvm_s3df <- data.frame(
  NMDS1 = lv_scores_s3[,1],
  NMDS2 = lv_scores_s3[,2],
  Treatment = site_scores_s3$Treatment,
  Method = "GLLVM"
)
combined_scores <- rbind(nmds_s3df, gllvm_s3df)

ggplot(combined_scores, aes(x = NMDS1, y = NMDS2, color = Treatment)) +
  geom_point(size = 2, alpha = 0.7) +
  stat_ellipse(aes(group = Treatment), level = 0.95) +
  facet_wrap(~Method,scales="free")+
  scale_color_manual(values = c("Before_Impact" = "#E31A1C", "After_Impact" = "gold2",
                                "Before_Control" = "#1F78B4", "After_Control" = "#33A02C")) +
  theme_bw()

# if go with PERMANOVA and PCoA as both with raw dissimilarity metrics

##s3:permanova####
dist_bray <- vegdist(SpecBenthic, method = "bray")
perm <- adonis2(dist_bray ~ Treatment,
                data = CovBenthic,
                permutations = 9999)
perm

##s3:permanova pairwise comparisons####
treatments <- unique(CovBenthic$Treatment)
length(treatments)
pairwise_results <- list()

for(i in 1:(length(treatments)-1)){
  for(j in (i+1):length(treatments)){
    tr1 <- treatments[i]
    tr2 <- treatments[j]
    # Subset data for this treatment pair
    pair_idx <- CovBenthic$Treatment %in% c(tr1, tr2)
    pair_data <- CovBenthic[pair_idx, ]
    # Subset the Bray-Curtis distance matrix to match the pair data
    pair_dist <- as.dist(as.matrix(dist_bray)[pair_idx, pair_idx])
    # Run PERMANOVA
    result <- adonis2(pair_dist ~ Treatment,
                      data = pair_data,
                      permutations = 9999)
    # Save result
    pairwise_results[[paste(tr1, "vs", tr2)]] <- result
  }
}

# Extract results and create summary table
p_values <- sapply(pairwise_results, function(x) x$`Pr(>F)`[1])
f_values <- sapply(pairwise_results, function(x) x$F[1])
r2_values <- sapply(pairwise_results, function(x) x$R2[1])
df_values <- sapply(pairwise_results, function(x) x$Df[1])

# Apply Benjamini-Hochberg p-value adjustment
adjusted_p <- p.adjust(p_values, method = "BH")

# Create comprehensive results table
s3_permanova_result <- data.frame(
  Comparison = names(pairwise_results),
  Df = df_values,
  SumOfSqs = sapply(pairwise_results, function(x) x$SumOfSqs[1]),
  F_value = round(f_values, 4),
  R2 = round(r2_values, 4),
  P_value = round(p_values, 4),
  Adj_P_value = round(adjusted_p, 4),
  Significance = ifelse(adjusted_p < 0.001, "***",
                        ifelse(adjusted_p < 0.01, "**",
                               ifelse(adjusted_p < 0.05, "*",
                                      ifelse(adjusted_p < 0.1, ".", "ns"))))
)

#betadisper(homogeneity of group dispersion pairwise test)
permutest(betadisper(dist_bray,CovBenthic$Treatment)) #Not signif(follows PERMANOVA assumption)
anova(betadisper(dist_bray,CovBenthic$Treatment))
TukeyHSD(betadisper(dist_bray,CovBenthic$Treatment))

##s3:pcoa####
pcoa_res <- cmdscale(dist_bray, eig = T, k=2)
scores <- as.data.frame(pcoa_res$points)
colnames(scores) <- c("PCoA1","PCoA2")
scores$Treatment <- CovBenthic$Treatment
scores$Method <- c("PCoA")

(s3_pcoa <- ggplot(scores, aes(x = PCoA1, y = PCoA2, color = Treatment, group = Treatment)) +
  stat_ellipse(aes(group = Treatment), level = 0.95, linetype = 2, 
               linewidth = 0.6, alpha = 0.7) +
  geom_point(alpha = 0.7, size = 2) +
  stat_summary(aes(group = Treatment), fun = mean, geom = "point", 
               size = 2, stroke = 1) +
  scale_color_manual(
    name = "Treatment",
    values = c(
      "Before_Impact" = "salmon", "After_Impact" = "darkred",
      "Before_Control" = "skyblue", "After_Control" = "darkblue"
    ),
    labels = c(
      "Before_Impact" = "Before Impact", "After_Impact" = "After Impact", 
      "Before_Control" = "Before Control", "After_Control" = "After Control"
    )
  ) +
  stat_summary(aes(group = Treatment), fun = mean) +
  labs(
    x = paste0("PCoA1 (", round(pcoa_res$eig[1] / sum(pcoa_res$eig) * 100, 1), "%)"),
    y = paste0("PCoA2 (", round(pcoa_res$eig[2] / sum(pcoa_res$eig) * 100, 1), "%)")
  ) +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.background = element_rect(fill = "white", color = "white"),
    legend.key = element_rect(fill = "white"),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40"),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 10, color = "black"),
    panel.grid.major = element_line(color = "gray90", size = 0.3),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", size = 0.8),
    plot.background = element_rect(fill = "white"),
    panel.background = element_rect(fill = "white")
  ) +
    guides(color=guide_legend(nrow=2))
)

ggsave(s3_pcoa,file="D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/s3PCoA.png",height=4,width=4,dpi=600,bg="white")

##s3:pcoa paired centroid distance####
#before impact vs before control
bibc_scores <- scores[scores$Treatment %in% c("Before_Impact","Before_Control"),]
bibc_centroids <- aggregate(bibc_scores[,1:2],
                            by = list(Treatment=bibc_scores$Treatment),
                            FUN = mean)
bibc_centroids_dist <- as.matrix(dist(bibc_centroids[,2:3],method="euclidean"))
bibc_centroids_dist[2]
dist(bibc_centroids[,2:3])[1] #0.6404947
#after impact vs after control
aiac_scores <- scores[scores$Treatment %in% c("After_Impact","After_Control"),]
aiac_centroids <- aggregate(aiac_scores[,1:2],
                            by = list(Treatment=aiac_scores$Treatment),
                            FUN = mean)
aiac_centroids_dist <- as.matrix(dist(aiac_centroids[,2:3],method="euclidean"))
dist(aiac_centroids[,2:3])[1] #0.1890768

bibc_dist <- bootstrap_centroid(scores,"Before_Impact","After_Impact")
bibc_dist_summary <- quantile(bibc_dist,probs = c(0.025,0.5,0.975)) 
aiac_dist <- bootstrap_centroid(scores,"After_Impact","After_Control")
aiac_dist_summary <- quantile(aiac_dist,probs = c(0.025,0.5,0.975)) 

diff_dist <- aiac_dist - bibc_dist
mean(diff_dist < 0) # proportion of after distance smaller than before distance

s3_df <- data.frame(
  Comparison = c("BIBC", "AIAC", "AIAC - BIBC"),
  Mean = c(mean(bibc_dist), mean(aiac_dist), mean(diff_dist)),
  Lower = c(quantile(bibc_dist, 0.025), quantile(aiac_dist, 0.025), quantile(diff_dist, 0.025)),
  Upper = c(quantile(bibc_dist, 0.975), quantile(aiac_dist, 0.975), quantile(diff_dist, 0.975))
)
s3_df$Comparison <- factor(s3_df$Comparison,levels=c("BIBC","AIAC","AIAC - BIBC"))
(s3_cd <- ggplot(s3_df, aes(x = Comparison, y = Mean)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=Lower, ymax=Upper), width=0.2) +
  geom_hline(yintercept=0, linetype="dashed", colour="grey40") +
  labs(x=NULL,y=NULL) +
  theme_bw(base_size=14)
)


colnames(scores)[1:2] <- c("NMDS1","NMDS2")
combined_scores2 <- rbind(combined_scores, scores)
ggplot(combined_scores2, aes(x = NMDS1, y = NMDS2, color = Treatment)) +
  geom_point(size = 2, alpha = 0.7) +
  stat_ellipse(aes(group = Treatment), level = 0.95) +
  facet_wrap(~Method,scales="free")+
  scale_color_manual(values = c("Before_Impact" = "#E31A1C", "After_Impact" = "gold2",
                                "Before_Control" = "#1F78B4", "After_Control" = "#33A02C")) +
  theme_bw()


##s3:raw spp list####
df_s3_raw <- df_s3 %>%
  left_join(spptaxa,by = c("ValidName")) %>%
  dplyr::select(-ValidName) %>%
  #filter(Rank %in% c("species","genus","family")) %>%
  dplyr::select(-Rank) %>%
  group_by(SITE,TargetMMS,BACI,statusatsampling,distance,NewValidName) %>%
  dplyr::summarise(MEAN = sum(MEAN), .groups = "drop") %>%
  pivot_wider(values_from = "MEAN",
              names_from = "NewValidName") %>%
  mutate(across(where(is.numeric), ~ replace_na(.,0))) %>%
  arrange(SITE) 

df_s3_raw %>% 
  group_by(BACI,statusatsampling) %>%
  dplyr::summarise(n=n())

SpecNames <- colnames(df_s3_raw[, 6:284]) #271 for df_s3_clean
SpecNames
HowOften <- colSums(df_s3_raw[, SpecNames] > 0)
HowOften
plot(HowOften, type = "h")

#' In a real analysis, we would use 'sampled at least at 5 sites'
#' as threshold. 
NotTheseSpecies <- colnames(df_s3_raw[, SpecNames])[HowOften < 5]
NotTheseSpecies

#' Dump these species
df_s3_1 <- select(df_s3_raw, -one_of(NotTheseSpecies))

#' And determine the new species names
NewSpeciesNames <- SpecNames[!SpecNames %in% NotTheseSpecies]
NewSpeciesNames
length(NewSpeciesNames)  #121

#Number of spp used in the analysis
Np <- length(NewSpeciesNames) 
Np

#' Determine the % of zeros for each species:
BenthicZeros <- 100 * colSums(df_s3_1[,NewSpeciesNames] == 0) / nrow(df_s3_1)

#' Combine these % with the name of the species:
df <- data.frame(percent_zeros = BenthicZeros,
                 species       = names(BenthicZeros))

#' Plot the results.
ggplot(df, aes(x = percent_zeros, y = species)) +
  geom_bar(stat = "identity", width = 0.2) +  
  labs(x = "Percentage of Zeros", 
       y = "Species") +
  theme_minimal() + xlim (0, 100) +
  theme(axis.text.y = element_text(size = 5))  
# Most of them exhibit a extreme high proportion of zeros

#s3:data preparation
SpecBenthic <- as.matrix(df_s3_1[,NewSpeciesNames])
class(SpecBenthic) #matrix array
head(SpecBenthic)

CovBenthic <- df_s3_1[,c("BACI","statusatsampling")]
CovBenthic$Treatment <- paste(CovBenthic$statusatsampling,CovBenthic$BACI,sep="_")

meanY <- apply(SpecBenthic,2,mean)
varY <- apply(SpecBenthic,2,var)
plot(log(meanY),varY, log = "y", main = "Species mean-variance relationship")
points(log(sort(meanY)), sort(meanY), type = "l")
points(log(sort(meanY)), sort(meanY+ 1*meanY^2), type = "l", col=2, lty=2)
legend("bottomright", lty=1:2, legend = c("var = mean", "var = mean+phi*mu^2"), bty="n", col = 1:2)

m0_s3 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               formula = ~ Treatment,
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", mehtod = "BFGS", maxit = 20000),
               seed = 1234)
m1_s3 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", mehtod = "BFGS", maxit = 20000),
               seed = 1234)
AIC(m0_s3,m1_s3) #Treatment is an important fixed term

lv_scores_s3 <- getLV(m1_s3)
site_scores_s3 <- data.frame(
  LV1 = lv_scores_s3[, 1],
  LV2 = lv_scores_s3[, 2],
  Treatment = CovBenthic[,c("Treatment")],
  statusatsampling = CovBenthic[,c("statusatsampling")],
  BACI = CovBenthic[,c("BACI")],
  Site = rownames(lv_scores_s3)
)

dev.off()
ggplot(site_scores_s3, aes(x = LV1, y = LV2, color = Treatment)) +
  stat_ellipse(aes(group = Treatment), level = 0.95, linetype = 2, 
               size = 0.6, alpha = 0.7) +
  geom_point(alpha = 0.7, size = 2) +
  stat_summary(aes(group = Treatment), fun = mean, geom = "point", 
               size = 2, stroke = 1) +
  scale_color_manual(
    name = "Treatment",
    values = c(
      "Before_Impact" = "salmon","After_Impact" = "darkred",
      "Before_Control" = "skyblue","After_Control" = "darkblue"
    ),
    labels = c(
      "Before_Impact" = "Before Impact","After_Impact" = "After Impact", 
      "Before_Control" = "Before Control","After_Control" = "After Control"
    )
  ) +
  stat_summary(
    aes(group = Treatment),fun = mean) +
  labs(x = "Latent Variable 1",y = "Latent Variable 2") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "right",
    legend.title = element_text(face = "bold", size = 11),
    legend.text = element_text(size = 10),
    legend.background = element_rect(fill = "white", color = "gray50"),
    legend.key = element_rect(fill = "white"),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40"),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 10, color = "black"),
    panel.grid.major = element_line(color = "gray90", size = 0.3),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", size = 0.8),
    plot.background = element_rect(fill = "white"),
    panel.background = element_rect(fill = "white")
  )

#overall decommissioning effect
m_s3 <- manova(cbind(LV1,LV2) ~ Treatment, data=site_scores_s3)
summary(m_s3) # NOT signif
# Since not significant, there will be no need for pairwise comparison

#S2----
setwd("D:/UoE/Writings/Data Chapter 2/LO/revisions/coding")
df_MMS_s2 <- read.csv("df_decommtwice2.csv")

df_MMS_s2 <- df_MMS_s2 %>%
  filter(distance >= 0) %>%
  mutate(statusatsampling = factor(statusatsampling,levels=c("PRECOMMISSIONING","ACTIVE","NOT IN USE")),
         statusatsampling = gsub("PRECOMMISSIONING","baseline",statusatsampling),
         statusatsampling = gsub("ACTIVE","pre",statusatsampling),
         statusatsampling = gsub("NOT IN USE","post",statusatsampling)) %>%
  mutate(BACI = case_when(distance <= 500 ~ "Impact",
                          distance > 500 ~ "Control"),
         BACI = factor(BACI,levels=c("Impact","Control"))) %>%
  mutate(decommissioning_date = lubridate::make_date(year = decommissioningyr, month = decommissioningmonth,day=1),
         sampling_date = lubridate::make_date(year = samplingyear, month = samplingmonth,day=1),
         monthafterdecomm = lubridate::time_length(interval(decommissioning_date,sampling_date),"months")) %>%
  filter(monthafterdecomm >= 0) #remove SW19001 which is a baseline for 15/23d-13

head(df_MMS_s2,20)

MMS_obs <- df_MMS_s2 %>%
  group_by(TargetMMS,sampling_date,BACI) %>%
  summarise(n_samps = n_distinct(SITE),.groups="drop") %>%
  pivot_wider(names_from=BACI, values_from = n_samps) %>%
  filter(!is.na(Control)) %>%
  #filter(!(TargetMMS == "15/20b- 12" & sampling_date == "1996-06-01")) %>%
  arrange(TargetMMS, sampling_date) %>%
  group_by(TargetMMS) %>%
  mutate(Decommphase = ifelse(row_number() == 1, "Early", "Later")) %>% #at each MMS group,1st row as Early,2nd row as Later
  ungroup()

df_s2 <- df_MMS_s2 %>%
  left_join(MMS_obs,by=c("TargetMMS","sampling_date")) %>%
  dplyr::select(-Impact,-Control) %>%
  left_join(spp_df,by=c("SITE")) %>%
  select(SITE,TargetMMS,BACI,monthafterdecomm,Decommphase,ValidName,MEAN,distance) %>%
  distinct()

setwd("D:/UoE/Writings/Data Chapter 2/LO/revisions/coding")
spptaxa <- read.csv("speciesrank.csv")
head(spptaxa)

df_s2 <- df_s2 %>%
  left_join(spptaxa,by = c("ValidName")) %>%
  dplyr::select(-ValidName) %>%
  filter(Rank %in% c("species","genus","family")) %>%
  dplyr::select(-Rank) %>%
  group_by(SITE,TargetMMS,BACI,Decommphase,distance,NewValidName) %>%
  summarise(MEAN = sum(MEAN),.groups = "drop") %>%
  pivot_wider(values_from = "MEAN",
              names_from = "NewValidName") %>%
  mutate(across(where(is.numeric), ~ replace_na(.,0))) %>%
  arrange(SITE) 

#s2_sppabundance <- df_s2
#save(s2_sppabundance,file="D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/Essex data repository/s2_sppabundance.Rdata")


df_s2 %>% 
  group_by(BACI,Decommphase) %>%
  summarise(n=n())

SpecNames <- colnames(df_s2[, 6:833]) # check they are all spp
SpecNames
HowOften <- colSums(df_s2[, SpecNames] > 0)
HowOften
plot(HowOften, type = "h")
summary(HowOften)

#' In a real analysis, we would use 'sampled at least at 25 sites'
#' as threshold. 
NotTheseSpecies <- colnames(df_s2[, SpecNames])[HowOften < 25]
NotTheseSpecies

#' Dump these species
df_s2_1 <- select(df_s2, -one_of(NotTheseSpecies))

#' And determine the new species names
NewSpeciesNames <- SpecNames[!SpecNames %in% NotTheseSpecies]
NewSpeciesNames
length(NewSpeciesNames)  #167

#Number of spp used in the analysis
Np <- length(NewSpeciesNames) 
Np

#' Determine the % of zeros for each species:
BenthicZeros <- 100 * colSums(df_s2_1[,NewSpeciesNames] == 0) / nrow(df_s2_1)

#' Combine these % with the name of the species:
df <- data.frame(percent_zeros = BenthicZeros,
                 species       = names(BenthicZeros))

#' Plot the results.
ggplot(df, aes(x = percent_zeros, y = species)) +
  geom_bar(stat = "identity", width = 0.2) +  
  labs(x = "Percentage of Zeros", 
       y = "Species") +
  theme_minimal() + xlim (0, 100) +
  theme(axis.text.y = element_text(size = 5))  
# Most of them exhibit a extreme high proportion of zeros

#s3:data preparation
SpecBenthic <- as.matrix(df_s2_1[,NewSpeciesNames])
remove_rows <- which(rowSums(SpecBenthic) == 0) #150L
SpecBenthic <- SpecBenthic %>%
  as.data.frame() %>%
  filter(rowSums(.)!=0) %>%
  as.matrix()
class(SpecBenthic) #matrix array
head(SpecBenthic)

CovBenthic <- df_s2_1[,c("BACI","Decommphase","TargetMMS")]
CovBenthic$Treatment <- paste(CovBenthic$Decommphase,CovBenthic$BACI,sep="_")
CovBenthic <- CovBenthic[-remove_rows,]

m0_s2 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               formula = ~ Treatment,
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", method = "BFGS", maxit = 20000),
               seed = 1234)

m1_s2 <- gllvm(SpecBenthic, CovBenthic,
               family = "negative.binomial",
               num.lv = 2,
               sd.errors = T,
               control.start = list(starting.val = "random"),
               control = list(optimizer = "optim", method = "BFGS", maxit = 20000),
               seed = 1234)
AIC(m0_s2,m1_s2)
save(m0_s2,m1_s2,file="s2_gllvm.Rdata")
setwd("D:/UoE/Writings/Data Chapter 2/LO/revisions/coding")
load("s2_gllvm.Rdata")

#diagnostic plot
dev.off()
par(mfrow=c(1,3))
meanY <- apply(SpecBenthic,2,mean)
varY <- apply(SpecBenthic,2,var)
plot(log(meanY),varY, log = "y", main = "Species mean-variance relationship")
points(log(sort(meanY)), sort(meanY), type = "l")
points(log(sort(meanY)), sort(meanY+ 1*meanY^2), type = "l", col=2, lty=2)
legend("bottomright", lty=1:2, legend = c("var = mean", "var = mean+phi*mu^2"), bty="n", col = 1:2)
#variance increases faster than the mean 

plot(m0_s2,which = 1:2,var.colors = 1,n.plot = 100)
dev.off()

##s2:bioindicators####
dev.off()
gllvm::coefplot(m0_s2,cex.ylab = 0.5,y.label = T,which.X=1)
coefs <- data.frame(summary(m0_s2)$Coef.table) #coeffficients
colnames(coefs)[4] <- c("pval")
coefs_signif <- coefs[coefs$pval < 0.05,]
coefs_signif <- coefs_signif %>%
  mutate(
    Rowname = rownames(.),
    Treatment = str_split_fixed(Rowname, ":", 2)[,1],
    Treatment = gsub("Treatment","",Treatment),
    Treatment = gsub("_"," ",Treatment),
    Species = str_split_fixed(Rowname, ":", 2)[,2],
    Directionality = ifelse(Estimate > 0, "Positive", "Negative"),
    CI_lower = Estimate - 1.96*Std..Error,
    CI_upper = Estimate + 1.96*Std..Error
  ) %>%
  filter(Treatment == "Later Impact") #signif spp in this treatment as bioindicators

(s2_biospp <- ggplot(coefs_signif, aes(x = Estimate, y = reorder(Species, Estimate),
                                       color = Directionality)) +
    geom_point(size = 3) +
    geom_errorbar(aes(xmin = CI_lower, xmax = CI_upper), width = 0.2, size = 1) +
    geom_vline(xintercept = 0, linetype = "dashed",size=0.5) +
    scale_color_manual(values = c("Positive" = "darkred", "Negative" = "darkblue")) +
    theme_bw(base_size = 14) +
    theme(strip.text = element_text(size = 14),
          legend.position = "none",
          axis.title = element_blank())
)

coefs_top_positive <- coefs_signif %>%
  filter(Directionality == "Positive") %>%
  slice_max(order_by = Estimate, n=3, with_ties = F)

coefs_top_negative <- coefs_signif %>%
  filter(Directionality == "Negative") %>%
  slice_min(order_by = Estimate, n=3, with_ties = F)

coefs_top <- rbind(coefs_top_positive,coefs_top_negative)
coefs_top$Species[2:3] #check if codes change
coefs_top$Species[2] <- c("Phascolion strombus")
coefs_top$Species[3] <- c("Leucon nasica")

head(coefs_top)
(s2_biospp_top <- ggplot(coefs_top, aes(x = Estimate, y = reorder(Species, Estimate),
                                        color = Directionality)) +
    geom_point(size = 3) +
    geom_errorbar(aes(xmin = CI_lower, xmax = CI_upper), width = 0.2, size = 1) +
    geom_vline(xintercept = 0, linetype = "dashed",size=0.5) +
    scale_x_continuous(
      trans = "pseudo_log",  # Handles zeros and negative values
      breaks = c(-10, 0, 10, 30000),
      labels = scales::comma_format()
    ) +
    scale_color_manual(values = c("Positive" = "darkred", "Negative" = "darkblue")) +
    scale_y_discrete(labels = function(x) str_wrap(x, width = 10)) +
    theme_bw(base_size = 14) +
    theme(axis.text = element_text(color="black")) +
    theme(strip.text = element_text(size = 14),
          legend.position = "none",
          axis.title = element_blank())
)

##s2:lv-based ordination####
lv_scores_s2 <- getLV(m1_s2)
site_scores_s2 <- data.frame(
  LV1 = lv_scores_s2[, 1],
  LV2 = lv_scores_s2[, 2],
  Treatment = CovBenthic[,c("Treatment")],
  statusatsampling = CovBenthic[,c("Decommphase")],
  BACI = CovBenthic[,c("BACI")],
  Site = rownames(lv_scores_s2)
)

(s2_ordplot <- ggplot(site_scores_s2, aes(x = LV1, y = LV2, color = Treatment, group = Treatment)) +
  stat_ellipse(aes(group = Treatment), level = 0.95, linetype = 2, 
               size = 0.6, alpha = 0.7) +
  geom_point(alpha = 0.7, size = 2) +
  stat_summary(aes(group = Treatment), fun = mean, geom = "point", 
               size = 2, stroke = 1) +
  scale_color_manual(
    name = "Treatment",
    values = c(
      "Early_Impact" = "salmon","Later_Impact" = "darkred",
      "Early_Control" = "skyblue","Later_Control" = "darkblue"
    ),
    labels = c(
      "Early_Impact" = "Early Impact","Later_Impact" = "Later Impact", 
      "Early_Control" = "Early Control","Later_Control" = "Later Control"
    )
  ) +
  stat_summary(
    aes(group = Treatment),fun = mean) +
  labs(x = "Latent Variable 1",y = "Latent Variable 2") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.background = element_rect(fill = "white", color = "white"),
    legend.key = element_rect(fill = "white"),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40"),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 10, color = "black"),
    panel.grid.major = element_line(color = "gray90", size = 0.3),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", size = 0.8),
    plot.background = element_rect(fill = "white"),
    panel.background = element_rect(fill = "white")
  ) +
  guides(color=guide_legend(nrow=2))
)
ggsave(s2_ordplot,file="D:/UoE/Writings/Data Chapter 2/LO/revisions/coding/s2ordplot.png",height=4,width=4,dpi=600,bg="white")


##s2:permanova####
dist_bray <- vegdist(SpecBenthic, method = "bray")
perm <- adonis2(dist_bray ~ Treatment,
                strata = CovBenthic$TargetMMS,
                data = CovBenthic,
                permutations = 999)
perm

##s2:permanova pairwise comparisons####
treatments <- unique(CovBenthic$Treatment)
length(treatments)
pairwise_results <- list()

for(i in 1:(length(treatments)-1)){
  for(j in (i+1):length(treatments)){
    tr1 <- treatments[i]
    tr2 <- treatments[j]
    # Subset data for this treatment pair
    pair_idx <- CovBenthic$Treatment %in% c(tr1, tr2)
    pair_data <- CovBenthic[pair_idx, ]
    # Subset the Bray-Curtis distance matrix to match the pair data
    pair_dist <- as.dist(as.matrix(dist_bray)[pair_idx, pair_idx])
    # Run PERMANOVA
    result <- adonis2(pair_dist ~ Treatment,
                      data = pair_data,
                      strata = pair_data$TargetMMS, 
                      permutations = 999)
    # Save result
    pairwise_results[[paste(tr1, "vs", tr2)]] <- result
  }
}

# Extract results and create summary table
p_values <- sapply(pairwise_results, function(x) x$`Pr(>F)`[1])
f_values <- sapply(pairwise_results, function(x) x$F[1])
r2_values <- sapply(pairwise_results, function(x) x$R2[1])
df_values <- sapply(pairwise_results, function(x) x$Df[1])

# Apply Benjamini-Hochberg p-value adjustment
adjusted_p <- p.adjust(p_values, method = "BH")

# Create comprehensive results table
s2_permanova_result <- data.frame(
  Comparison = names(pairwise_results),
  Df = df_values,
  SumOfSqs = sapply(pairwise_results, function(x) x$SumOfSqs[1]),
  F_value = round(f_values, 4),
  R2 = round(r2_values, 4),
  P_value = round(p_values, 4),
  Adj_P_value = round(adjusted_p, 4),
  Significance = ifelse(adjusted_p < 0.001, "***",
                        ifelse(adjusted_p < 0.01, "**",
                               ifelse(adjusted_p < 0.05, "*",
                                      ifelse(adjusted_p < 0.1, ".", "ns"))))
)

#betadisper(homogeneity of group dispersion pairwise test)
permutest(betadisper(dist_bray,CovBenthic$Treatment)) #Signif(violate PERMANOVA assumption)
anova(betadisper(dist_bray,CovBenthic$Treatment))
TukeyHSD(betadisper(dist_bray,CovBenthic$Treatment))

##s2:pcoa####
dist_bray <- vegdist(SpecBenthic, method = "bray")
pcoa_res <- cmdscale(dist_bray, eig = T, k=2)
scores <- as.data.frame(pcoa_res$points)
colnames(scores) <- c("PCoA1","PCoA2")
scores$Treatment <- CovBenthic$Treatment
scores$Method <- c("PCoA")

(s2_pcoa <- ggplot(scores, aes(x = PCoA1, y = PCoA2, color = Treatment, group = Treatment)) +
    stat_ellipse(aes(group = Treatment), level = 0.95, linetype = 2, 
                 linewidth = 0.6, alpha = 0.7) +
    geom_point(alpha = 0.7, size = 2) +
    stat_summary(aes(group = Treatment), fun = mean, geom = "point", 
                 size = 2, stroke = 1) +
    scale_color_manual(
      name = "Treatment",
      values = c(
        "Early_Impact" = "salmon", "Later_Impact" = "darkred",
        "Early_Control" = "skyblue", "Later_Control" = "darkblue"
      ),
      labels = c(
        "Early_Impact" = "Early Impact", "Later_Impact" = "Later Impact", 
        "Early_Control" = "Early Control", "Later_Control" = "Later Control"
      )
    ) +
    stat_summary(aes(group = Treatment), fun = mean) +
    labs(
      x = paste0("PCoA1 (", round(pcoa_res$eig[1] / sum(pcoa_res$eig) * 100, 1), "%)"),
      y = paste0("PCoA2 (", round(pcoa_res$eig[2] / sum(pcoa_res$eig) * 100, 1), "%)")
    ) +
    theme_bw(base_size = 12) +
    theme(
      legend.position = "bottom",
      legend.title = element_blank(),
      legend.text = element_text(size = 12),
      legend.background = element_rect(fill = "white", color = "white"),
      legend.key = element_rect(fill = "white"),
      plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
      plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40"),
      axis.title = element_text(face = "bold", size = 12),
      axis.text = element_text(size = 10, color = "black"),
      panel.grid.major = element_line(color = "gray90", size = 0.3),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", size = 0.8),
      plot.background = element_rect(fill = "white"),
      panel.background = element_rect(fill = "white")
    ) +
    guides(color=guide_legend(nrow=2))
)

##s2:pcoa paired centroid distance####
#early impact vs early control
eiec_scores <- scores[scores$Treatment %in% c("Early_Impact","Early_Control"),]
eiec_centroids <- aggregate(eiec_scores[,1:2],
                            by = list(Treatment=eiec_scores$Treatment),
                            FUN = mean)
eiec_centroids_dist <- as.matrix(dist(eiec_centroids[,2:3],method="euclidean"))
eiec_centroids_dist[2]
dist(eiec_centroids[,2:3])[1] #0.1585529
#later impact vs later control
lilc_scores <- scores[scores$Treatment %in% c("Later_Impact","Later_Control"),]
lilc_centroids <- aggregate(lilc_scores[,1:2],
                            by = list(Treatment=lilc_scores$Treatment),
                            FUN = mean)
lilc_centroids_dist <- as.matrix(dist(lilc_centroids[,2:3],method="euclidean"))
dist(lilc_centroids[,2:3])[1] #0.1403631

eiec_dist <- bootstrap_centroid(scores,"Early_Impact","Early_Control")
eiec_dist_summary <- quantile(bibc_dist,probs = c(0.025,0.5,0.975)) 
lilc_dist <- bootstrap_centroid(scores,"Later_Impact","Later_Control")
lilc_dist_summary <- quantile(aiac_dist,probs = c(0.025,0.5,0.975)) 

diff_dist <- lilc_dist - eiec_dist
mean(diff_dist < 0,na.rm=T) # proportion of after distance smaller than before distance

s2_df <- data.frame(
  Comparison = c("EIEC", "LILC", "LILC - EIEC"),
  Mean = c(mean(eiec_dist), mean(lilc_dist), mean(diff_dist)),
  Lower = c(quantile(eiec_dist, 0.025), quantile(lilc_dist, 0.025), quantile(diff_dist, 0.025)),
  Upper = c(quantile(eiec_dist, 0.975), quantile(lilc_dist, 0.975), quantile(diff_dist, 0.975))
)
s2_df$Comparison <- factor(s2_df$Comparison,levels=c("EIEC", "LILC", "LILC - EIEC"))
(s2_cd <-ggplot(s2_df, aes(x = Comparison, y = Mean)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=Lower, ymax=Upper), width=0.2) +
  geom_hline(yintercept=0, linetype="dashed", colour="grey40") +
  labs(x=NULL,y=NULL) +
  theme_bw(base_size=14)
)



#Figure assembly####
setwd("D:/UoE/Writings/Data Chapter 2/LO/revisions/coding")
save(s1_pcoa,s2_pcoa,s3_pcoa,file = "s1s2s3_pcoa.Rdata")

save(s1_ordplot,s2_ordplot,s3_pcoa,s1_biospp_top,s2_biospp_top,s3_biospp_top,
     file = "s1s2s3_ordinationfigures.Rdata")
load("s1s2s3_ordinationfigures.Rdata")

(p_ord <- (s1_ordplot + s2_ordplot + s3_pcoa))
ggsave(p_ord,path = "D:/UoE/Writings/Data Chapter 2/LO/revisions/coding",
       filename = "s1s2s3ordination.png",,width=10,height=4,dpi=600,bg="white")

(p_pcoa <- (s1_pcoa + s2_pcoa + s3_pcoa))
ggsave(p_pcoa,path = "D:/UoE/Writings/Data Chapter 2/LO/revisions/coding",
       filename = "s1s2s3pcoa.png",,width=10,height=4,dpi=600,bg="white")

(p_spp <- (s1_biospp_top + s2_biospp_top + s3_biospp_top))
ggsave(p_spp,path = "D:/UoE/Writings/Data Chapter 2/LO/revisions/coding",
       filename = "s1s2s3biospp.png",,width=12,height=4,dpi=600,bg="white")


(p_final <- (s1_ordplot + s2_ordplot + s3_pcoa)/(s1_biospp_top + s2_biospp_top + s3_biospp_top) +
   plot_annotation(tag_levels = "A"))

ggsave(p_final,path = "D:/UoE/Writings/Data Chapter 2/LO/revisions/coding",
       filename = "s1s2s3ordinationbioindicator.png",,width=14,height=8,dpi=600,bg="white")

(p_cd <- s1_cd + s2_cd + s3_cd + plot_layout(guides = 'collect')+
    plot_annotation(tag_levels = "A"))
ggsave(p_cd,path = "D:/UoE/Writings/Data Chapter 2/LO/revisions/coding",
       filename = "s1s2s3centroiddifference.png",,width=12,height=4,dpi=600,bg="white")
