Supplemental fig2 preprocessing scripts and figure plot

192b1ccd · Matteo Barcella · ecad0b3b · 192b1ccd · 192b1ccd · 192b1ccd
Commit 192b1ccd authored Aug 04, 2025 by Matteo Barcella
--- a/Supplemental_fig2_input.rds
+++ b/Supplemental_fig2_input.rds
--- a/Supplemental_fig2_part1.R
+++ b/Supplemental_fig2_part1.R
--- a/Supplemental_fig2_part2.R
+++ b/Supplemental_fig2_part2.R
+# Identify the a shared and common effect of the culturing in celltypes as well as cell-specific modulations.
+
+library(Seurat)
+library(MAST)
+library(reshape2)
+library(ggplot2)
+library(gridExtra)
+library(pheatmap)
+library(ComplexHeatmap)
+library(RColorBrewer)
+library(openxlsx)
+library(clusterProfiler)
+library(msigdbr)
+library(enrichplot)
+
+rootdir <- "CultureEffectClassification/"
+
+# Step 1. Identify classes of genes that fall into the different patterns ####
+# Pattern UP: UPUP, nsUP, UPns [consistently UP, late UP response, early UP response]
+# Pattern DW: DWDW, nsDW, DWns [consistently DW, late DW response, early DW response]
+# Transient UP response: UPDW
+# Transient DW response: DWUP
+
+# cicle over populations and store according to categories above #
+
+populations <- c("EryProg","GMP","HSC","HSPC","MCP",
+                 "MEP-cycling_I","MEP-cycling_I", 
+                 "MEP-EryProg-I", "MEP-EryProg-II",
+                 "MkProg","MLP","MonoProg",
+                 "Erythrocytes","Prog-cycling_I","Prog-cycling_II"#,"Ribo+"
+                 )
+
+dframe_X_classes <- list()
+genes_X_classes <- list()
+
+for(popcell in populations){
+  mydata <- readRDS(paste0(rootdir,popcell,"/Pairwise_tests_significant_Classification_",popcell,"_binary.rds"))
+  mydata$Class <- "undetermined"
+  mydata$Class[which(mydata$PatternLight %in% c("UPUP","nsUP","UPns","nsnsUP"))] <- "UP"
+  mydata$Class[which(mydata$PatternLight %in% c("DWDW","nsDW","DWns","nsnsDW"))] <- "DW"
+  mydata$Class[which(mydata$PatternLight %in% c("UPDW"))] <- "trUP"
+  mydata$Class[which(mydata$PatternLight %in% c("DWUP"))] <- "trDW"
+  dframe_X_classes[[popcell]] <- split(x = mydata, f = mydata$Class)
+  for(j in names(dframe_X_classes[[popcell]])){
+    genes_X_classes[[popcell]][[j]] <- rownames(dframe_X_classes[[popcell]][[j]])
+  }
+}
+
+# do upsetR on specific patterns for all ####
+
+library(UpSetR)
+
+class_X_pop <- list()
+
+for(popcell in populations){
+  for(k in names(genes_X_classes[[popcell]])){
+    class_X_pop[[k]][[popcell]] <- genes_X_classes[[popcell]][[k]]
+  }
+}
+
+
+# create upset input and create chart ####
+
+PaperDir <- paste0(rootdir, "Paper/")
+dir.create(PaperDir)
+
+upsetr.input <- list()
+upsetr.input_string <- list()
+
+for(j in names(class_X_pop)){
+  upsetr.input[[j]] <- fromList(class_X_pop[[j]])
+  rownames(upsetr.input[[j]]) <- unique(unlist(class_X_pop[[j]]))
+  pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations.pdf"), width = 18, height = 6)
+  print(upset(upsetr.input[[j]] , order.by = "freq", sets = rev(colnames(upsetr.input[[j]])), keep.order = TRUE, nintersects = NA))
+  dev.off()
+}
+
+class_X_pop_symplified <- list(DW = NULL, UP = NULL,
+                               trUP = NULL, trDW = NULL)
+
+for(k in names(class_X_pop_symplified)){
+  class_X_pop_symplified[[k]]$MyeLineage <- c(class_X_pop[[k]]$GMP)
+  class_X_pop_symplified[[k]]$MonoProg <- class_X_pop[[k]]$MonoProg
+  
+  class_X_pop_symplified[[k]]$MCP <- class_X_pop[[k]]$MCP
+  class_X_pop_symplified[[k]]$MkEryLineage <- c(class_X_pop[[k]]$EryProg, class_X_pop[[k]]$`MEP-cycling_I`, 
+                                       class_X_pop[[k]]$`MEP-EryProg-I`, class_X_pop[[k]]$`MEP-EryProg-II`, 
+                                       class_X_pop[[k]]$MkProg, class_X_pop[[k]]$Erythrocytes)
+  class_X_pop_symplified[[k]]$ProgCycling <- c(class_X_pop[[k]]$`Prog-cycling_I`, 
+                                               class_X_pop[[k]]$`Prog-cycling_II`)
+  class_X_pop_symplified[[k]]$HSC <- c(class_X_pop[[k]]$HSC, class_X_pop[[k]]$MLP)
+  class_X_pop_symplified[[k]]$HSPC <- c(class_X_pop[[k]]$HSPC)
+}
+
+upsetr.input.simple <- list()
+upsetr.input.simple_string <- list()
+
+colpalette <- c("#A6CEE3","#B2DF8A","#FDBF6F","#E78AC3","#FFFF99","#E31A1C","darkgrey")
+Labelset <- c("HSC","HSPC","MonoProg","MyeLineage","MCP","MkEryLineage","ProgCycling")
+names(Labelset) <- colpalette
+
+for(j in names(class_X_pop_symplified)){
+  upsetr.input.simple[[j]] <- fromList(class_X_pop_symplified[[j]])
+  rownames(upsetr.input.simple[[j]]) <- unique(unlist(class_X_pop_symplified[[j]]))
+   
+  myset <- Labelset[Labelset %in% colnames(upsetr.input.simple[[j]])]
+  
+  pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations_simply.pdf"), width = 18, height = 6)
+  print(upset(upsetr.input.simple[[j]] , order.by = "freq", sets.bar.color = names(myset),
+              sets = myset,
+              keep.order = TRUE, nintersects = NA))
+  dev.off()
+}
+
+
+### Which are the common pathways that are modulated across celltypes upon culturing #####
+
+for(j in c("DW","UP")){
+  tmpdata <- upsetr.input.simple[[j]]
+  tmpdata_shared <- tmpdata[rowSums(tmpdata) > 3,]
+  pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 , 
+                                                 paste , collapse = "")
+  upsetr.input.simple_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
+  
+  for (k in colnames(tmpdata)) {
+    test <- "0000000"
+    substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
+    print(test)
+    upsetr.input.simple_string[[j]][[k]] <- pattern_data[pattern_data == test]
+  }
+  
+  ### detailed version 
+  
+  tmpdata <- upsetr.input[[j]]
+  tmpdata_shared <- tmpdata[rowSums(tmpdata) > 9,]
+  pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 , 
+                         paste , collapse = "")
+  upsetr.input_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
+  
+  for (k in colnames(tmpdata)) {
+    test <- "00000000000000"
+    substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
+    print(test)
+    upsetr.input_string[[j]][[k]] <- pattern_data[pattern_data == test]
+  }
+}
+
+#### Perform enrichment #####
+
+# create universes for combined simple labels ####
+
+universes <- list()
+universes_symplified <- list()
+
+for(m in colnames(upsetr.input$DW)){
+  universes[[m]] <- readLines(paste0(rootdir, "enrichment/",m,"_universe.txt"))
+}
+
+universes_symplified$MyeLineage <- unique(universes$GMP)
+universes_symplified$MonoProg <- unique(universes$MonoProg)
+universes_symplified$MkEryLineage <- unique(c(universes$EryProg, universes$`MEP-cycling_I`, 
+                                     universes$`MEP-EryProg-I`, universes$`MEP-EryProg-II`, 
+                                     universes$MkProg, universes$Erythrocytes))
+universes_symplified$ProgCycling <- unique(c(universes$`Prog-cycling_I`, 
+                                     universes$`Prog-cycling_II`))
+universes_symplified$HSC <- unique(c(universes$HSC, universes$MLP))
+universes_symplified$HSPC <- unique(c(universes$HSPC))
+universes_symplified$MCP <- unique(c(universes$MCP))
+universes_symplified$shared <- unique(unlist(universes_symplified))
+
+universes$shared <- unique(unlist(universes))
+
+# Perform enrichment #####
+
+enrichmentdir <- "PatternsEnrichments/"
+dir.create(enrichmentdir) 
+m_t2g <- read.gmt(gmtfile = "h.all.v2023.1.Hs.symbols.gmt")
+
+#### Perform enrichment #####
+
+enrichobj <- list()
+upsetr.input.simple_names <- list()
+for(j in names(upsetr.input.simple_string)){
+  for(i in names(upsetr.input.simple_string[[j]])){
+    upsetr.input.simple_names[[j]][[i]] <- names(upsetr.input.simple_string[[j]][[i]])
+  }
+}
+
+
+for(a in c("DW","UP")){
+  xx <- compareCluster(upsetr.input.simple_names[[a]], fun="enrichGO", pvalueCutoff=0.05,
+                       OrgDb='org.Hs.eg.db', keyType = "SYMBOL", universe = universes_symplified[["shared"]], 
+                       ont = "BP")
+  saveRDS(xx, file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".rds"))
+  # if(nrow(xx@compareClusterResult) < 2){
+  #   next
+  # }
+  xx <- pairwise_termsim(xx)                     
+  pdf(file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".pdf"), width = 16, height = 12, paper = "a4r")
+  goplot <- emapplot(xx, pie="count", cex_category=1, cex_line = 0.4, layout="kk")
+  print(goplot)
+  dev.off()
+  
+  for(j in names(universes_symplified)){
+  enrichobj[[j]][[a]] <- enricher(names(upsetr.input.simple_string[[a]][[j]]), TERM2GENE=m_t2g,
+                               universe = universes_symplified[[j]], pvalueCutoff = 0.05)
+  }
+}
+
+saveRDS(enrichobj,"Paper/Hallmarks_enrichments_patternSimple_populations.rds")
+saveRDS(upsetr.input_string, file = "Paper/upsetr.input_string.rds")
+saveRDS(upsetr.input.simple_string, file = "Paper/upsetr.input.simple_string.rds")
+
+## Creating common plot - HALLMARKS ####
+
+for(i in names(enrichobj)){
+  for(k in names(enrichobj[[i]])){
+    enrichobj[[i]][[k]]@result$Pop <- i
+    enrichobj[[i]][[k]]@result$Direction <- k
+    enrichobj[[i]][[k]]@result$ID <- gsub(x = enrichobj[[i]][[k]]@result$ID, pattern = "HALLMARK_", replacement = "")
+    enrichobj[[i]][[k]]@result$Description <- gsub(x = enrichobj[[i]][[k]]@result$Description, pattern = "HALLMARK_", replacement = "")
+    rownames(enrichobj[[i]][[k]]@result) <- gsub(x = rownames(enrichobj[[i]][[k]]@result), pattern = "HALLMARK_", replacement = "")
+  }
+}
+
+df <- data.frame()
+
+for(i in names(enrichobj)){
+  for(k in names(enrichobj[[i]])){
+    df <- rbind.data.frame(df, enrichobj[[i]][[k]]@result)
+  }
+}
+
+df_binarized <- df
+df_binarized$TileVal <- 0
+df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "UP")] <- 1
+df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "DW")] <- -1
+
+library(reshape2)
+df_binarized <- df_binarized[df_binarized$TileVal != 0,]
+df_binarized_mt <- dcast(data = df_binarized, formula = ID ~ Pop + Direction, value.var = "TileVal")
+df_binarized_mt[is.na(df_binarized_mt)] <- 0
+rownames(df_binarized_mt) <- df_binarized_mt$ID
+df_binarized_mt$ID <- NULL
+
+png(filename = "Paper/Hallmarks_enrichments_pheatmap.png", width = 12, height = 6, units = "in", res = 300)
+pheatmap(df_binarized_mt)
+dev.off()
+
+library(ggplot2)
+png(filename = "Paper/Hallmarks_enrichments_tileplot.png", width = 12, height = 6, units = "in", res = 300)
+ggplot(data = df_binarized, mapping = aes(x = Direction, y = ID, fill = TileVal)) + geom_tile() +
+  facet_grid(. ~ Pop, scales = "free", space = "free") + 
+  scale_fill_distiller(palette = "RdBu", direction = -1)
+dev.off()
+
+saveRDS(object = df_binarized_mt, "Paper/Hallmarks_enrichments_pheatmap_data.rds")
+saveRDS(object = df_binarized, "Paper/Hallmarks_enrichments_tileplot_data.rds")
+
+library(enrichplot)
+
+UP_PLOT <- cnetplot(enrichobj$shared$UP, showCategory = 20, cex_label_gene = 0.6,
+         cex_label_category = 1, color_category = "#E31A1C", ) + 
+  ggtitle("Enrichment from UP genes") + 
+  #ggtitle("Enriched hallmarks from UP genes across culturing") + 
+  theme(plot.title = element_text(hjust = 0.5), 
+        legend.position = "bottom")
+
+DW_PLOT <- cnetplot(enrichobj$shared$DW, showCategory = 20, cex_label_gene = 0.6,
+                    cex_label_category = 1,  color_category = "#1F78B4") +
+  ggtitle("Enrichment from DW genes") + 
+  #ggtitle("Enriched hallmarks from DW genes across culturing") + 
+  theme(plot.title = element_text(hjust = 0.5), 
+        legend.position = "bottom")
+
+library(gridExtra)
+
+png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.png", 
+    width = 24, height = 12, units = "in", res = 300)
+grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.5, 0.5))
+dev.off()
+
+png(filename = "Paper/Hallmark_enriched_terms_common_UP_modulated_genes_across_populations.png", 
+    width = 12, height = 12, units = "in", res = 300)
+UP_PLOT + theme(plot.title = element_blank())
+dev.off()
+
+png(filename = "Paper/Hallmark_enriched_terms_common_DW_modulated_genes_across_populations.png", 
+    width = 12, height = 12, units = "in", res = 300)
+DW_PLOT + theme(plot.title = element_blank())
+dev.off()
+
+###
+
+png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations_B.png", 
+    width = 12, height = 9, units = "in", res = 300)
+grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
+dev.off()
+
+pdf(file = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.pdf", 
+       width = 12, height = 9)
+gridExtra::grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
+dev.off()