library(Seurat)
library(dplyr)
library(openxlsx)
library(clusterProfiler)
library(ReactomePA)
library(org.Mm.eg.db)
library(dplyr)
library(stringr)
library(ComplexHeatmap)
library(RColorBrewer)
library(grid)
library(scales)
library(ggrepel)
library(cowplot)
library(gridExtra)
library(circlize)

plotVolcano <- function(dge_res, adj.pval.thr, logfc.thr) {
    data <- data.frame(gene = row.names(dge_res), pval = -log10(dge_res$p_val_adj), lfc = dge_res$avg_logFC)
    data <- na.omit(data)
    data <- mutate(data, color = case_when(data$lfc > logfc.thr & data$pval > -log10(adj.pval.thr) ~ "Overexpressed",
                                           data$lfc < -logfc.thr & data$pval > -log10(adj.pval.thr) ~ "Underexpressed",
                                           abs(data$lfc) < logfc.thr | data$pval < -log10(adj.pval.thr) ~ "NonSignificant"))
    data <- data[order(data$pval, decreasing = TRUE),]
    highl <- head(subset(data, color != "NonSignificant"), 20)
    vol <- ggplot(data, aes(x = lfc, y = pval, colour = color)) +
        theme_bw(base_size = 12) +
        theme(legend.position = "right") +
        xlab("log2 Fold Change") + 
        ylab(expression(-log[10]("adjusted p-value"))) +
        geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray") +
        geom_point(size = 2, alpha = 0.8, na.rm = T) +
        scale_color_manual(name = "Expression",
                           values = c(Overexpressed = "#CD4F39",
                                      Underexpressed = "#008B00",
                                      NonSignificant = "darkgray")) +
        geom_label_repel(data = highl, aes(label = gene), show.legend = FALSE)
    return(vol)
}


wdir <- "Birocchi_SciTraMed2022/results"
macro_obj <- readRDS(paste(wdir, "MacrophagesFiltered", "01-fastMNN_2", "MacrophagesFiltered_fastMNN_final.rds", sep = "/"))

md <- macro_obj@meta.data
md <- mutate(md, RNA_Treatment = ifelse(md$RNA_Group == "AB_Ctrl", "Control", "GeneTherapy"))
rownames(md) <- colnames(macro_obj)
head(md)
treat <- md$RNA_Treatment
names(treat) <- rownames(md)

macro_obj <- AddMetaData(object = macro_obj, metadata = treat, col.name = "RNA_Treatment")

Idents(macro_obj) <- "RNA_Treatment"
treat_dge <- FindMarkers(object = macro_obj, ident.1 = "GeneTherapy", ident.2 = "Control", logfc.threshold = 0, min.pct = 0)

head(treat_dge)

write.xlsx(x = list("GeneTherapy-Control" = treat_dge),
           file = paste(wdir, "MacrophagesFiltered", "02-fastMNN_2-post-analysis", "GeneTherapy-Control-DGE.xlsx", sep = "/"), row.names = T)

adj.pval.thr <- 1e-06
logfc.thr <- 0
vol <- plotVolcano(treat_dge, adj.pval.thr, logfc.thr)
ggsave(filename = paste(wdir, "MacrophagesFiltered", "02-fastMNN_2-post-analysis", "GeneTherapy-Control-volcano.png", sep = "/"),
       plot = vol, width = 10, height = 9, dpi = 200)


org_db <- org.Mm.eg.db
gmt_obj <- read.gmt("h.all.v7.1.symbols_mouse.gmt")
c7_gmt <- read.gmt("c7.all.v7.1.symbols_mouse.gmt")
pval_thr <- 0.05

out_dir <- paste(wdir, "MacrophagesFiltered", "02-fastMNN_2-post-analysis", sep = "/")
contr <- "GeneTherapy-Control"
dge_res_con <- treat_dge
dge_res_con$symbol <- rownames(dge_res_con)
dge_res_con$entrez <- mapIds(x = org_db,
                             keys = dge_res_con$symbol,
                             column = "ENTREZID",
                             keytype = "SYMBOL",
                             multiVals = "first")
head(dge_res_con)
# Sort by pval
gene.res.logfc <- as.vector(dge_res_con$avg_logFC)
names(gene.res.logfc) <- dge_res_con$symbol
gene.res.logfc <- sort(gene.res.logfc, decreasing = TRUE)
gene.res.logfc <- gene.res.logfc[!duplicated(names(gene.res.logfc))]
print(gene.res.logfc[1:10])
gene.res.logfc.entrez <- as.vector(dge_res_con$avg_logFC)
names(gene.res.logfc.entrez) <- dge_res_con$entrez
gene.res.logfc.entrez <- sort(gene.res.logfc.entrez, decreasing = TRUE)
gene.res.logfc.entrez <- gene.res.logfc.entrez[!duplicated(names(gene.res.logfc.entrez))]
print(gene.res.logfc.entrez[1:10])

# Top genes
top.genes <- subset(dge_res_con, p_val_adj < 0.05)
top.genes.names <- as.character(top.genes$symbol)
print(top.genes.names[1:10])
top.genes.entrez <- as.character(top.genes$entrez)
print(top.genes.entrez[1:10])
# Top genes pos
top.genes.pos <- subset(dge_res_con, p_val_adj < 0.05 & avg_logFC > 0)
top.genes.pos.names <- as.character(top.genes.pos$symbol)
print(top.genes.pos.names[1:10])
top.genes.pos.entrez <- as.character(top.genes.pos$entrez)
print(top.genes.pos.entrez[1:10])
# Top genes neg
top.genes.neg <- subset(dge_res_con, p_val_adj < 0.05 & avg_logFC < 0)
top.genes.neg.names <- as.character(top.genes.neg$symbol)
print(top.genes.neg.names[1:10])
top.genes.neg.entrez <- as.character(top.genes.neg$entrez)
print(top.genes.neg.entrez[1:10])

##################
### Enrichment ###
##################
enr_res <- list()
## enrich GO
contr.enrich_go <- enrichGO(gene = top.genes.names,
                            universe = as.character(dge_res_con$symbol),
                            OrgDb = org_db,
                            ont = "BP",
                            pAdjustMethod = "BH",
                            pvalueCutoff = pval_thr,
                            keyType = 'SYMBOL')
enr_res[["enr_GO_BP"]] <- contr.enrich_go
## enrich KEGG
contr.enrich_kegg <- enrichKEGG(gene = top.genes.entrez,
                                organism = "mmu",
                                universe = as.character(dge_res_con$entrez),
                                pvalueCutoff = pval_thr)
contr.enrich_kegg.symb <- setReadable(contr.enrich_kegg, org_db, keyType = "ENTREZID")
enr_res[["enr_KEGG"]] <- contr.enrich_kegg.symb
## enrich Pathway
contr.enrich_path <- enrichPathway(gene = top.genes.entrez,
                                   organism = "mouse",
                                   universe = as.character(dge_res_con$entrez),
                                   pvalueCutoff = pval_thr)
contr.enrich_path.symb <- setReadable(contr.enrich_path, org_db, keyType = "ENTREZID")
enr_res[["enr_Pathway"]] <- contr.enrich_path.symb
## Enrich
contr.hallmark <- enricher(top.genes.names,
                           universe = as.character(dge_res_con$symbol),
                           pvalueCutoff = pval_thr,
                           TERM2GENE = gmt_obj)
enr_res[["enr_HALLMARK"]] <- contr.hallmark
contr.c7 <- enricher(top.genes.names,
                     universe = as.character(dge_res_con$symbol),
                     pvalueCutoff = pval_thr,
                     TERM2GENE = c7_gmt)
enr_res[["enr_C7"]] <- contr.hallmark
write.xlsx(x = enr_res, file = paste(out_dir, paste0(contr, "_enrichment.xlsx"), sep = "/"))


######################
### Enrichment Pos ###
######################
enr_res_pos <- list()
## enrich GO
contr.enrich_go.pos <- enrichGO(gene = top.genes.pos.names,
                            universe = as.character(dge_res_con$symbol),
                            OrgDb = org_db,
                            ont = "BP",
                            pAdjustMethod = "BH",
                            pvalueCutoff = pval_thr,
                            keyType = 'SYMBOL')
enr_res_pos[["enr_pos_GO_BP"]] <- contr.enrich_go.pos
## enrich KEGG
contr.enrich_kegg_pos <- enrichKEGG(gene = top.genes.pos.entrez,
                                organism = "mmu",
                                universe = as.character(dge_res_con$entrez),
                                pvalueCutoff = pval_thr)
contr.enrich_kegg_pos.symb <- setReadable(contr.enrich_kegg_pos, org_db, keyType = "ENTREZID")
enr_res_pos[["enr_pos_KEGG"]] <- contr.enrich_kegg_pos.symb
## enrich Pathway
contr.enrich_path_pos <- enrichPathway(gene = top.genes.pos.entrez,
                                   organism = "mouse",
                                   universe = as.character(dge_res_con$entrez),
                                   pvalueCutoff = pval_thr)
contr.enrich_path_pos.symb <- setReadable(contr.enrich_path_pos, org_db, keyType = "ENTREZID")
enr_res_pos[["enr_pos_Pathway"]] <- contr.enrich_path_pos.symb
## Enrich
contr.hallmark_pos <- enricher(top.genes.pos.names,
                           universe = as.character(dge_res_con$symbol),
                           pvalueCutoff = pval_thr,
                           TERM2GENE = gmt_obj)
enr_res_pos[["enr_pos_HALLMARK"]] <- contr.hallmark_pos
contr.c7_pos <- enricher(top.genes.pos.names,
                         universe = as.character(dge_res_con$symbol),
                         pvalueCutoff = pval_thr,
                         TERM2GENE = c7_gmt)
enr_res_pos[["enr_pos_C7"]] <- contr.c7_pos
write.xlsx(x = enr_res_pos, file = paste(out_dir, paste0(contr, "_enrichment_pos.xlsx"), sep = "/"))

######################
### Enrichment Neg ###
######################
enr_res_neg <- list()
## enrich GO
contr.enrich_go.neg <- enrichGO(gene = top.genes.neg.names,
                                universe = as.character(dge_res_con$symbol),
                                OrgDb = org_db,
                                ont = "BP",
                                pAdjustMethod = "BH",
                                pvalueCutoff = pval_thr,
                                keyType = 'SYMBOL')
enr_res_neg[["enr_neg_GO_BP"]] <- contr.enrich_go.neg
## enrich KEGG
contr.enrich_kegg_neg <- enrichKEGG(gene = top.genes.neg.entrez,
                                    organism = "mmu",
                                    universe = as.character(dge_res_con$entrez),
                                    pvalueCutoff = pval_thr)
contr.enrich_kegg_neg.symb <- setReadable(contr.enrich_kegg_neg, org_db, keyType = "ENTREZID")
enr_res_neg[["enr_neg_KEGG"]] <- contr.enrich_kegg_neg.symb
## enrich Pathway
contr.enrich_path_neg <- enrichPathway(gene = top.genes.neg.entrez,
                                       organism = "mouse",
                                       universe = as.character(dge_res_con$entrez),
                                       pvalueCutoff = pval_thr)
contr.enrich_path_neg.symb <- setReadable(contr.enrich_path_neg, org_db, keyType = "ENTREZID")
enr_res_neg[["enr_neg_Pathway"]] <- contr.enrich_path_neg.symb
## Enrich
contr.hallmark_neg <- enricher(top.genes.neg.names,
                               universe = as.character(dge_res_con$symbol),
                               pvalueCutoff = pval_thr,
                               TERM2GENE = gmt_obj)
enr_res_neg[["enr_neg_HALLMARK"]] <- contr.hallmark_neg
contr.c7_neg <- enricher(top.genes.neg.names,
                         universe = as.character(dge_res_con$symbol),
                         pvalueCutoff = pval_thr,
                         TERM2GENE = c7_gmt)
enr_res_neg[["enr_neg_C7"]] <- contr.c7_neg
write.xlsx(x = enr_res_neg, file = paste(out_dir, paste0(contr, "_enrichment_neg.xlsx"), sep = "/"))


############
### GSEA ###
############
gsea_res <- list()
## gsea GO
cat("# gsea GO", "\n")
contr.gsea_go <- gseGO(geneList = gene.res.logfc.entrez,
                       OrgDb = org_db,
                       ont = "BP",
                       nPerm = 10000,
                       minGSSize = 100,
                       maxGSSize = 500,
                       pvalueCutoff = pval_thr,
                       verbose = FALSE)
contr.gsea_go.symb <- setReadable(contr.gsea_go, org_db, keyType = "auto")
gsea_res[["gsea_GO_BP"]] <- contr.gsea_go.symb
## GSEA KEGG
cat("# gsea KEGG", "\n")
contr.gsea_kegg <- gseKEGG(geneList = gene.res.logfc.entrez,
                           organism = "mmu",
                           nPerm = 10000,
                           minGSSize = 120,
                           pvalueCutoff = pval_thr,
                           verbose = FALSE)
contr.gsea_kegg.symb <- setReadable(contr.gsea_kegg, org_db, keyType = "ENTREZID")
gsea_res[["gsea_KEGG"]] <- contr.gsea_kegg.symb
## GSEA Pathway
cat("# gsea Pathway", "\n")
contr.gsea_path <- gsePathway(geneList = gene.res.logfc.entrez,
                              organism = "mouse",
                              nPerm = 10000,
                              minGSSize = 120,
                              pvalueCutoff = pval_thr,
                              verbose = FALSE)
contr.gsea_path.symb <- setReadable(contr.gsea_path, org_db, keyType = "ENTREZID")
gsea_res[["gsea_Pathway"]] <- contr.gsea_path.symb
## GSEA HALLMARK
contr.enricher.gsea <- GSEA(gene.res.logfc, TERM2GENE = gmt_obj, nPerm = 10000, pvalueCutoff = pval_thr)
gsea_res[["gsea_HALLMARK"]] <- contr.enricher.gsea
contr.enricher.gsea.c7 <- GSEA(gene.res.logfc, TERM2GENE = c7_gmt, nPerm = 10000, pvalueCutoff = pval_thr)
gsea_res[["gsea_C7"]] <- contr.enricher.gsea.c7
write.xlsx(x = gsea_res, file = paste(out_dir, paste0(contr, "_GSEA.xlsx"), sep = "/"))

#######################################################################################
# No cluster 6
wdir <- "Birocchi_SciTraMed2022/results"
macro_obj <- readRDS(paste(wdir, "MacrophagesFiltered", "01-fastMNN_2", "MacrophagesFiltered_fastMNN_final.rds", sep = "/"))

clu6_mark <- subset(macro_obj@misc$markers_full$RNA_snn_res.0.6, cluster == "6")
nrow(clu6_mark)
head(clu6_mark)

org_db <- org.Mm.eg.db
gmt_obj <- read.gmt("h.all.v7.1.symbols_mouse.gmt")
pval_thr <- 0.05

out_dir <- paste(wdir, "MacrophagesFiltered", "02-fastMNN_2-post-analysis", sep = "/")
contr <- "Clu6Markers"
dge_res_con <- clu6_mark
dge_res_con$entrez <- mapIds(x = org_db,
                             keys = dge_res_con$gene,
                             column = "ENTREZID",
                             keytype = "SYMBOL",
                             multiVals = "first")
head(dge_res_con)
# Sort by pval
gene.res.logfc <- as.vector(dge_res_con$avg_logFC)
names(gene.res.logfc) <- dge_res_con$gene
gene.res.logfc <- sort(gene.res.logfc, decreasing = TRUE)
gene.res.logfc <- gene.res.logfc[!duplicated(names(gene.res.logfc))]
print(gene.res.logfc[1:10])
gene.res.logfc.entrez <- as.vector(dge_res_con$avg_logFC)
names(gene.res.logfc.entrez) <- dge_res_con$entrez
gene.res.logfc.entrez <- sort(gene.res.logfc.entrez, decreasing = TRUE)
gene.res.logfc.entrez <- gene.res.logfc.entrez[!duplicated(names(gene.res.logfc.entrez))]
print(gene.res.logfc.entrez[1:10])

# Top genes
top.genes <- subset(dge_res_con, p_val_adj < 1e-06)
top.genes.names <- as.character(top.genes$gene)
print(top.genes.names[1:10])
top.genes.entrez <- as.character(top.genes$entrez)
print(top.genes.entrez[1:10])
# Top genes pos
top.genes.pos <- subset(dge_res_con, p_val_adj < 1e-06 & avg_logFC > 0)
top.genes.pos.names <- as.character(top.genes.pos$gene)
print(top.genes.pos.names[1:10])
top.genes.pos.entrez <- as.character(top.genes.pos$entrez)
print(top.genes.pos.entrez[1:10])
# Top genes neg
top.genes.neg <- subset(dge_res_con, p_val_adj < 1e-06 & avg_logFC < 0)
top.genes.neg.names <- as.character(top.genes.neg$gene)
print(top.genes.neg.names[1:10])
top.genes.neg.entrez <- as.character(top.genes.neg$entrez)
print(top.genes.neg.entrez[1:10])

##################
### Enrichment ###
##################
enr_res <- list()
## enrich GO
contr.enrich_go <- enrichGO(gene = top.genes.names,
                            universe = as.character(dge_res_con$gene),
                            OrgDb = org_db,
                            ont = "BP",
                            pAdjustMethod = "BH",
                            pvalueCutoff = pval_thr,
                            keyType = 'SYMBOL')
enr_res[["enr_GO_BP"]] <- contr.enrich_go
## enrich KEGG
contr.enrich_kegg <- enrichKEGG(gene = top.genes.entrez,
                                organism = "mmu",
                                universe = as.character(dge_res_con$entrez),
                                pvalueCutoff = pval_thr)
contr.enrich_kegg.symb <- setReadable(contr.enrich_kegg, org_db, keyType = "ENTREZID")
enr_res[["enr_KEGG"]] <- contr.enrich_kegg.symb
## enrich Pathway
contr.enrich_path <- enrichPathway(gene = top.genes.entrez,
                                   organism = "mouse",
                                   universe = as.character(dge_res_con$entrez),
                                   pvalueCutoff = pval_thr)
contr.enrich_path.symb <- setReadable(contr.enrich_path, org_db, keyType = "ENTREZID")
enr_res[["enr_Pathway"]] <- contr.enrich_path.symb
## Enrich
contr.hallmark <- enricher(top.genes.names,
                           universe = as.character(dge_res_con$gene),
                           pvalueCutoff = pval_thr,
                           TERM2GENE = gmt_obj)
enr_res[["enr_HALLMARK"]] <- contr.hallmark
write.xlsx(x = enr_res, file = paste(out_dir, paste0(contr, "_enrichment.xlsx"), sep = "/"))


######################
### Enrichment Pos ###
######################
enr_res_pos <- list()
## enrich GO
contr.enrich_go.pos <- enrichGO(gene = top.genes.pos.names,
                                universe = as.character(dge_res_con$gene),
                                OrgDb = org_db,
                                ont = "BP",
                                pAdjustMethod = "BH",
                                pvalueCutoff = pval_thr,
                                keyType = 'SYMBOL')
enr_res_pos[["enr_pos_GO_BP"]] <- contr.enrich_go.pos
## enrich KEGG
contr.enrich_kegg_pos <- enrichKEGG(gene = top.genes.pos.entrez,
                                    organism = "mmu",
                                    universe = as.character(dge_res_con$entrez),
                                    pvalueCutoff = pval_thr)
contr.enrich_kegg_pos.symb <- setReadable(contr.enrich_kegg_pos, org_db, keyType = "ENTREZID")
enr_res_pos[["enr_pos_KEGG"]] <- contr.enrich_kegg_pos.symb
## enrich Pathway
contr.enrich_path_pos <- enrichPathway(gene = top.genes.pos.entrez,
                                       organism = "mouse",
                                       universe = as.character(dge_res_con$entrez),
                                       pvalueCutoff = pval_thr)
contr.enrich_path_pos.symb <- setReadable(contr.enrich_path_pos, org_db, keyType = "ENTREZID")
enr_res_pos[["enr_pos_Pathway"]] <- contr.enrich_path_pos.symb
## Enrich
contr.hallmark_pos <- enricher(top.genes.pos.names,
                               universe = as.character(dge_res_con$gene),
                               pvalueCutoff = pval_thr,
                               TERM2GENE = gmt_obj)
enr_res_pos[["enr_pos_HALLMARK"]] <- contr.hallmark_pos
write.xlsx(x = enr_res_pos, file = paste(out_dir, paste0(contr, "_enrichment_pos.xlsx"), sep = "/"))

######################
### Enrichment Neg ###
######################
enr_res_neg <- list()
## enrich GO
contr.enrich_go.neg <- enrichGO(gene = top.genes.neg.names,
                                universe = as.character(dge_res_con$gene),
                                OrgDb = org_db,
                                ont = "BP",
                                pAdjustMethod = "BH",
                                pvalueCutoff = pval_thr,
                                keyType = 'SYMBOL')
enr_res_neg[["enr_neg_GO_BP"]] <- contr.enrich_go.neg
## enrich KEGG
contr.enrich_kegg_neg <- enrichKEGG(gene = top.genes.neg.entrez,
                                    organism = "mmu",
                                    universe = as.character(dge_res_con$entrez),
                                    pvalueCutoff = pval_thr)
contr.enrich_kegg_neg.symb <- setReadable(contr.enrich_kegg_neg, org_db, keyType = "ENTREZID")
enr_res_neg[["enr_neg_KEGG"]] <- contr.enrich_kegg_neg.symb
## enrich Pathway
contr.enrich_path_neg <- enrichPathway(gene = top.genes.neg.entrez,
                                       organism = "mouse",
                                       universe = as.character(dge_res_con$entrez),
                                       pvalueCutoff = pval_thr)
contr.enrich_path_neg.symb <- setReadable(contr.enrich_path_neg, org_db, keyType = "ENTREZID")
enr_res_neg[["enr_neg_Pathway"]] <- contr.enrich_path_neg.symb
## Enrich
contr.hallmark_neg <- enricher(top.genes.neg.names,
                               universe = as.character(dge_res_con$gene),
                               pvalueCutoff = pval_thr,
                               TERM2GENE = gmt_obj)
enr_res_neg[["enr_neg_HALLMARK"]] <- contr.hallmark_neg
write.xlsx(x = enr_res_neg, file = paste(out_dir, paste0(contr, "_enrichment_neg.xlsx"), sep = "/"))


############
### GSEA ###
############
gsea_res <- list()
## gsea GO
cat("# gsea GO", "\n")
contr.gsea_go <- gseGO(geneList = gene.res.logfc.entrez,
                       OrgDb = org_db,
                       ont = "BP",
                       nPerm = 10000,
                       minGSSize = 100,
                       maxGSSize = 500,
                       pvalueCutoff = pval_thr,
                       verbose = FALSE)
contr.gsea_go.symb <- setReadable(contr.gsea_go, org_db, keyType = "auto")
gsea_res[["gsea_GO_BP"]] <- contr.gsea_go.symb
## GSEA KEGG
cat("# gsea KEGG", "\n")
contr.gsea_kegg <- gseKEGG(geneList = gene.res.logfc.entrez,
                           organism = "mmu",
                           nPerm = 10000,
                           minGSSize = 120,
                           pvalueCutoff = pval_thr,
                           verbose = FALSE)
contr.gsea_kegg.symb <- setReadable(contr.gsea_kegg, org_db, keyType = "ENTREZID")
gsea_res[["gsea_KEGG"]] <- contr.gsea_kegg.symb
## GSEA Pathway
cat("# gsea Pathway", "\n")
contr.gsea_path <- gsePathway(geneList = gene.res.logfc.entrez,
                              organism = "mouse",
                              nPerm = 10000,
                              minGSSize = 120,
                              pvalueCutoff = pval_thr,
                              verbose = FALSE)
contr.gsea_path.symb <- setReadable(contr.gsea_path, org_db, keyType = "ENTREZID")
gsea_res[["gsea_Pathway"]] <- contr.gsea_path.symb
## GSEA HALLMARK
contr.enricher.gsea <- GSEA(gene.res.logfc, TERM2GENE = gmt_obj, nPerm = 10000, pvalueCutoff = pval_thr)
gsea_res[["gsea_HALLMARK"]] <- contr.enricher.gsea
write.xlsx(x = gsea_res, file = paste(out_dir, paste0(contr, "_GSEA.xlsx"), sep = "/"))
