Supplemental fig2 preprocessing scripts and figure plot

192b1ccd · Matteo Barcella · ecad0b3b · 192b1ccd · 192b1ccd · 192b1ccd
Commit 192b1ccd authored Aug 04, 2025 by Matteo Barcella
--- a/Supplemental_fig2_input.rds
+++ b/Supplemental_fig2_input.rds
--- a/Supplemental_fig2_part1.R
+++ b/Supplemental_fig2_part1.R
+# Evaluating the effect of culturing across time
+
+library(Seurat)
+library(MAST)
+library(reshape2)
+library(ggplot2)
+library(gridExtra)
+library(pheatmap)
+library(ComplexHeatmap)
+library(RColorBrewer)
+library(openxlsx)
+library(clusterProfiler)
+library(msigdbr)
+library(enrichplot)
+
+# load object
+
+obj <- readRDS(file = "Fig2C_Dataset.rds")
+
+obj <- SetIdent(obj, value = "orig.ident")
+obj <- subset(obj, idents = c("C02","C03","C04"))
+
+culture_markers <- list()
+
+for(pop in unique(obj$Classification)){
+  dir.create(pop)
+  tmp_obj <- SetIdent(obj, value = "Classification")
+  tmp_obj <- subset(x = tmp_obj, idents = pop)
+  tmp_obj <- SetIdent(tmp_obj, value = "orig.ident")
+  culture_markers[[pop]] <- FindAllMarkers(object = tmp_obj, return.thresh = 1e-05, 
+                                           random.seed = 300, test.use = "MAST", 
+                                           min.cells.group = 5)
+}
+
+# pairwise style
+
+culture_markers_paiwise <- list(C02vsC04 = list(), 
+                                C03vsC02 = list(),
+                                C03vsC04 = list())
+
+for(comp in names(culture_markers_paiwise)){
+  print(paste0("Running comparison: ", comp))
+  for(pop in unique(obj$Classification)){
+    print(paste0("Running population: ", pop))
+    tmp_obj <- SetIdent(obj, value = "Classification")
+    tmp_obj <- subset(x = tmp_obj, idents = pop)
+    
+    id1 <- strsplit(comp,split = "vs")[[1]][1]
+    id2 <- strsplit(comp,split = "vs")[[1]][2]
+    
+    # check number of cells (if less than 5 skip population)
+    id1_l <- length(which(tmp_obj$orig.ident == id1))
+    id2_l <- length(which(tmp_obj$orig.ident == id2))
+    
+    print(paste0("Group 1 n°cells: ", id1_l, "; Group 2 n°cells: ", id2_l))
+    
+    if(id1_l < 5 | id2_l < 5){
+      print("Skipping comparison ....")
+      next
+    }else{
+      tmp_obj <- SetIdent(tmp_obj, value = "orig.ident")
+      culture_markers_paiwise[[comp]][[pop]] <- FindMarkers(object = tmp_obj, 
+                                                            ident.1 = id1,
+                                                            ident.2 = id2, 
+                                                            random.seed = 300, 
+                                                            test.use = "MAST",
+                                                            logfc.threshold = 0, # for reporting logFC even if not significant
+                                                            min.cells.group = 5)
+    }
+  }
+}
+
+# Markers directory
+marker_dir <- "Supplemental_Fig2/"
+dir.create(marker_dir)
+
+saveRDS(culture_markers, file = paste0(marker_dir,"culture_markers_Classification.rds"))
+saveRDS(culture_markers_paiwise, file = paste0(marker_dir,"culture_markers_paiwise_Classification.rds"))
+
+write.xlsx(x = culture_markers, file = paste0(marker_dir, "culture_markers_Classification.xlsx"))
+for(i in names(culture_markers_paiwise)){
+  write.xlsx(x = culture_markers_paiwise[[i]], file = paste0(marker_dir, "culture_markers_Classification_",i,".xlsx"))
+}
+
+### TimeCourse - like analysis (identify patterns) ###
+
+genes2evaluate <- list()
+
+for(pop in unique(obj$Classification)){ 
+  
+  Z <- culture_markers[[pop]]
+  findall <- length(unique(Z[Z$p_val_adj < 0.01,]$gene))
+  
+  A1 <- culture_markers_paiwise$C02vsC04[[pop]]
+  B1 <- culture_markers_paiwise$C03vsC02[[pop]]
+  C1 <- culture_markers_paiwise$C03vsC04[[pop]]
+  
+  # only significative with logFC > 0.25
+  
+  agenes <- NULL
+  bgenes <- NULL
+  cgenes <- NULL
+  
+  if(!is.null(nrow(A1))){
+    A <- A1[A1$p_val_adj < 0.01 & abs(A1$avg_log2FC) > 0.25,]
+    agenes <- rownames(A)
+  }else{
+    print("no diff genes in C02vsC04 !")
+  }
+  
+  if(!is.null(nrow(B1))){
+    B <- B1[B1$p_val_adj < 0.01 & abs(B1$avg_log2FC) > 0.25,]
+    bgenes <- rownames(B)
+  }else{
+    print("no diff genes in C03vsC02 !")
+  }
+  
+  if(!is.null(nrow(C1))){
+    C <- C1[C1$p_val_adj < 0.01 & abs(C1$avg_log2FC) > 0.25,]
+    cgenes <- rownames(C)
+  }else{
+    print("no diff genes in C03vsC04 !")
+  }
+  
+  # numero geni totali derivanti da confronti pairwise
+  pairwiseunion.num <- length(unique(c(agenes,
+                                       bgenes,
+                                       cgenes)
+                                     )
+                              )
+  
+  # number of common genes between global test and (union of all pair-wise comparisons)
+  
+  common <-  length(base::intersect(x = unique(Z[Z$p_val_adj < 0.01,]$gene), 
+                                    y = unique(c(agenes,
+                                                 bgenes,
+                                                 cgenes
+                                    ))
+  ))
+  
+  print(paste0("Population ", pop, 
+               " fulltest: ", findall, 
+               "; pairwise union: ", pairwiseunion.num,
+               "; shared: ", common))
+  
+  tmpintersection <- base::intersect(x = unique(Z[Z$p_val_adj < 0.01,]$gene), 
+                                           y = unique(c(agenes,
+                                                        bgenes,
+                                                        cgenes
+                                           ))
+                                           )
+  if(!is.null(tmpintersection)){
+    genes2evaluate[[pop]] <- tmpintersection
+  }else{
+    next
+  }
+  
+}
+
+for(i in names(genes2evaluate)){
+  writeLines(text = genes2evaluate[[i]], con = paste0(marker_dir, "Common_markers_complete-pairwise_Classification_",i,".txt"))
+}
+
+for(pop in names(genes2evaluate)){ 
+  
+  mydata <- data.frame(row.names = genes2evaluate[[pop]])
+  
+  A1 <- culture_markers_paiwise$C02vsC04[[pop]]
+  B1 <- culture_markers_paiwise$C03vsC02[[pop]]
+  C1 <- culture_markers_paiwise$C03vsC04[[pop]]
+  
+  agenes <- NULL
+  bgenes <- NULL
+  cgenes <- NULL
+  
+  if(!is.null(nrow(A1))){
+    A <- A1[A1$p_val_adj < 0.01 & abs(A1$avg_log2FC) > 0.25,]
+    agenes <- rownames(A)
+  }else{
+    cat("no diff genes in C02vsC04 !")
+  }
+  
+  if(!is.null(nrow(B1))){
+    B <- B1[B1$p_val_adj < 0.01 & abs(B1$avg_log2FC) > 0.25,]
+    bgenes <- rownames(B)
+  }else{
+    cat("no diff genes in C03vsC02 !")
+  }
+  
+  if(!is.null(nrow(C1))){
+    C <- C1[C1$p_val_adj < 0.01 & abs(C1$avg_log2FC) > 0.25,]
+    cgenes <- rownames(C)
+  }else{
+    cat("no diff genes in C03vsC04 !")
+  }
+  
+  ##### Report only genes that are significantly different and with logFC > 0.25 ######
+  
+  index1 <- which(rownames(A1) %in% intersect(agenes,genes2evaluate[[pop]]))
+  index2 <- which(rownames(B1) %in% intersect(bgenes,genes2evaluate[[pop]]))
+  index3 <- which(rownames(C1) %in% intersect(cgenes,genes2evaluate[[pop]]))
+  
+  if(length(index1) != 0){
+    mydata <- merge.data.frame(mydata, A1[index1,"avg_log2FC",drop = F], by.x = 0, by.y = 0, all.x = T, sort = F)
+    rownames(mydata) <- mydata$Row.names
+    mydata$Row.names <- NULL
+    base::colnames(mydata)[1] <- "C02vsC04"
+  }else{
+    mydata[["C02vsC04"]] <- NA
+  }
+  
+  if(length(index2) != 0){
+    mydata <- merge.data.frame(mydata, B1[index2,"avg_log2FC",drop = F], by.x = 0, by.y = 0, all.x = T, sort = F)
+    rownames(mydata) <- mydata$Row.names
+    mydata$Row.names <- NULL
+    base::colnames(mydata)[2] <- "C03vsC02"
+  }else{
+    mydata[["C03vsC02"]] <- NA
+  }
+  
+  if(length(index3) != 0){
+    mydata <- merge.data.frame(mydata, C1[index3,"avg_log2FC",drop = F], by.x = 0, by.y = 0, all.x = T, sort = F)
+    rownames(mydata) <- mydata$Row.names
+    mydata$Row.names <- NULL
+    base::colnames(mydata)[3] <- "C03vsC04"
+  }else{
+    mydata[["C03vsC04"]] <- NA
+  }
+  
+  #### using C4 as ref point - starting point 
+  
+  mydata_ref_C04 <- data.frame(row.names = genes2evaluate[[pop]])
+  mydata_ref_C04$C04 <- 0
+  mydata_ref_C04$C02 <- 0
+  mydata_ref_C04$C03 <- 0
+  mydata_ref_C04[rownames(mydata),"C02"] <- mydata[rownames(mydata),"C02vsC04"]
+  mydata_ref_C04[rownames(mydata),"C03"] <- mydata[rownames(mydata),"C03vsC04"]
+  
+  saveRDS(object = mydata, file =paste0(pop,"/Pairwise_tests_significant_Classification_", pop ,".rds"))
+  saveRDS(object = mydata_ref_C04, file = paste0(pop,"/Pairwise_tests_significant_Classification_", pop ,"_C04_baseline.rds"))
+}
+
+###### Binarized mydata  #######
+
+expr_matrix <- list()
+
+for(pop in names(genes2evaluate)){ 
+#for(pop in c("10","11")){ 
+  
+  mydata <- readRDS(file = paste0(pop,"/Pairwise_tests_significant_Classification_", pop ,".rds"))
+  mydata_bin <- mydata
+  mydata_bin$Pattern <- "ND"
+  mydata_bin$PatternLight <- "ND"
+  for(i in 1:nrow(mydata)){
+    
+    gene <- rownames(mydata_bin)[i]
+    #print(paste0("row ", i, " gene: ", gene))
+    
+    # Starting with patterns including NAs from left to right (first to third comparison)
+    
+    # Pattern: nsns(UP)
+    if(is.na(mydata[gene,1]) & is.na(mydata[gene,2]) & mydata[gene,3] > 0){
+      mydata_bin[gene,] <- c(0,0,0.5,"nsns(UP)","nsnsUP") 
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ", paste0(0,0,0.5,"nsns(UP)","nsnsUP", sep = "_")))
+      next
+    }
+    
+    # Pattern: nsns(DW)
+    if(is.na(mydata[gene,1]) & is.na(mydata[gene,2]) & mydata[gene,3] < 0){
+      mydata_bin[gene,] <- c(0,0,-0.5,"nsns(DW)","nsnsDW") 
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,0,-0.5,"nsns(DW)","nsnsDW", sep = "_")))
+      next
+    }
+    
+    # Pattern: nsDW(ns)
+    if(is.na(mydata[gene,1]) & mydata[gene,2] < 0 & is.na(mydata[gene,3])){
+      mydata_bin[gene,] <- c(0,0,-1,"nsDW(ns)","nsDW") 
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,0,-1,"nsDW(ns)","nsDW", sep = "_")))
+      next
+    }
+    
+    # Pattern: nsDW(DW)
+    if(is.na(mydata[gene,1]) & mydata[gene,2] < 0 & mydata[gene,3] < 0){
+      mydata_bin[gene,] <- c(0,0,-1,"nsDW(DW)","nsDW") 
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,0,-1,"nsDW(DW)","nsDW", sep = "_")))
+      next
+    }
+    
+    # Pattern: nsUP(ns)
+    if(is.na(mydata[gene,1]) & mydata[gene,2] > 0 & is.na(mydata[gene,3])){
+      mydata_bin[gene,] <- c(0,0,1,"nsUP(ns)","nsUP") 
+      print(paste0(pop, ": row ", i, " gene: ", gene," pattern: ", paste0(0,0,1,"nsUP(ns)","nsUP", sep = "_")))
+      next
+    }
+    
+    # Pattern: nsUP(UP)
+    if(is.na(mydata[gene,1]) & mydata[gene,2] > 0 & mydata[gene,3] > 0){
+      mydata_bin[gene,] <- c(0,0,1,"nsUP(UP)","nsUP") 
+      print(paste0(pop, ": row ", i, " gene: ", gene," pattern: ", paste0(0,0,1,"nsUP(UP)","nsUP", sep = "_")))
+      next
+    }
+    
+    #### Condition with NA at second column
+    
+    # Pattern: UPns(ns)
+    if(mydata[gene,1] > 0 & is.na(mydata[gene,2]) & is.na(mydata[gene,3])){
+      mydata_bin[gene,] <- c(0,1,1,"UPns(ns)","UPns")
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,1,"UPns(ns)","UPns", sep = "_")))
+      next
+    }
+    
+    # Pattern: UPns(UP)
+    if(mydata[gene,1] > 0 & is.na(mydata[gene,2]) & mydata[gene,3] > 0){
+      mydata_bin[gene,] <- c(0,1,1,"UPns(UP)","UPns") 
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,1,"UPns(UP)","UPns", sep = "_")))
+      next
+    }
+    
+    # Pattern: DWns(ns)
+    if(mydata[gene,1] < 0 & is.na(mydata[gene,2]) & is.na(mydata[gene,3])){
+      mydata_bin[gene,] <- c(0,-1,-1,"DWns(ns)","DWns") # cluster 13
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-1,"DWns(ns)","DWns", sep = "_")))
+      next
+    }
+    
+    # Pattern: DWns(DW)
+    if(mydata[gene,1] < 0 & is.na(mydata[gene,2]) & mydata[gene,3] < 0){
+      mydata_bin[gene,] <- c(0,-1,-1,"DWns(DW)","DWns") # cluster 14
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-1,"DWns(DW)","DWns", sep = "_")))
+      next
+    }
+    
+    # Pattern: DWns(UP)
+    if(mydata[gene,1] < 0 & is.na(mydata[gene,2]) & mydata[gene,3] > 0){
+      mydata_bin[gene,] <- c(0,-1,-1,"DWns(UP)","DWns") # cluster 14
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-1,"DWns(UP)","DWns", sep = "_")))
+      next
+    }
+    
+    ### NA at 3rd column ###
+    
+    # Pattern: DWUP(ns) 
+    if(mydata[gene,1] < 0 & mydata[gene,2] > 0 & is.na(mydata[gene,3])){
+      mydata_bin[gene,] <- c(0,-1,0,"DWUP(ns)","DWUP") # cluster 17
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,0,"DWUP(ns)","DWUP", sep = "_")))
+      next
+    }
+    
+    # Pattern: UPDW(ns)
+    if(mydata[gene,1] > 0 & mydata[gene,2] < 0 & is.na(mydata[gene,3])){
+      mydata_bin[gene,] <- c(0,1,0,"UPDW(ns)","UPDW") 
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,0,"UPDW(ns)","UPDW", sep = "_")))
+      next
+    }
+    
+    # Pattern: UPUP(UP)
+    if(mydata[gene,1] > 0 & mydata[gene,2] > 0 & mydata[gene,3] > 0){
+      mydata_bin[gene,] <- c(0,1,2,"UPUP(UP)","UPUP")
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,2,"UPUP(UP)","UPUP", sep = "_")))
+      next
+    }
+    # Pattern: UPDW(UP)
+    if(mydata[gene,1] > 0 & mydata[gene,2] < 0 & mydata[gene,3] > 0){
+      mydata_bin[gene,] <- c(0,1,0.5,"UPDW(UP)","UPDW")
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,0.5,"UPDW(UP)","UPDW", sep = "_")))
+      next
+    }
+    # Pattern: UPDW(DW)
+    if(mydata[gene,1] > 0 & mydata[gene,2] < 0 & mydata[gene,3] < 0){
+      mydata_bin[gene,] <- c(0,1,-0.5,"UPDW(DW)","UPDW")
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,-0.5,"UPDW(DW)","UPDW", sep = "_")))
+      next
+    }
+    
+    # Pattern: DWUP(UP)
+    if(mydata[gene,1] < 0 & mydata[gene,2] > 0 & mydata[gene,3] > 0){
+      mydata_bin[gene,] <- c(0,-1,0.5,"DWUP(UP)","DWUP") 
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,0.5,"DWUP(UP)","DWUP", sep = "_")))
+      next
+    }
+    # Pattern: DWUP(DW)
+    if(mydata[gene,1] < 0 & mydata[gene,2] > 0 & mydata[gene,3] < 0){
+      mydata_bin[gene,] <- c(0,-1,-0.5,"DWUP(DW)","DWUP")
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-0.5,"DWUP(DW)","DWUP", sep = "_")))
+      next
+    }
+    # Pattern: DWDW(DW)
+    if(mydata[gene,1] < 0 & mydata[gene,2] < 0 & mydata[gene,3] < 0){
+      mydata_bin[gene,] <- c(0,-1,-2,"DWDW(DW)","DWDW")
+      print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-2,"DWDW(DW)","DWDW", sep = "_")))
+      next
+    }
+  }
+  
+  
+  saveRDS(object = mydata_bin, file = paste0(pop,"/Pairwise_tests_significant_Classification_",pop,"_binary.rds"))
+  
+  mydata_bin_df <- as.data.frame(mydata_bin)
+  mydata_bin_df$gene <- rownames(mydata_bin_df)
+  colnames(mydata_bin_df) <- c("C04","C02","C03","Pattern","PatternLight","gene")
+  m <- melt(mydata_bin_df, measure.vars = c("C04","C02","C03"), id.vars = c("gene","Pattern","PatternLight"))
+  m$value <- as.numeric(m$value)
+  
+  mytheme <- gridExtra::ttheme_default(
+    core = list(fg_params=list(cex = 0.5)),
+    colhead = list(fg_params=list(cex = 0.5)),
+    rowhead = list(fg_params=list(cex = 0.5)))
+  
+  m_cast <- dcast(m,formula = gene + Pattern ~ variable, value.var = 'value')
+  annotation_table <- as.data.frame(table(m_cast$Pattern))
+  colnames(annotation_table) <- c("Pattern","Ngenes")
+  
+  saveRDS(m, paste0(pop, "/Patterns_dataframe_Classification_",pop,".rds"))
+  saveRDS(m_cast, paste0(pop, "/Patterns_dataframe_casted_Classification_",pop,".rds"))
+  saveRDS(annotation_table, paste0(pop,"/Annotation_table_Classification_",pop,".rds"))
+  
+  tmpdf <- as.data.frame(table(m$Pattern))
+  new_labels <- paste0(tmpdf$Var1, "(", tmpdf$Freq, ")")
+  names(new_labels) <- tmpdf$Var1
+  
+  png(paste0(pop, "/",pop, "_patterns_Classification.png"), width = 6, height = 6, units = "in", res = 300)
+  print(ggplot(m, mapping = aes(x = variable, y = value, color = Pattern, group = gene)) + geom_line() + #, xmin=-12, xmax=-7, ymin=-12, ymax=-7) +
+          facet_wrap( . ~ Pattern, ncol = 4, labeller = labeller(Pattern = new_labels)
+          ) +
+          ggtitle(label = paste0("Culture effect - ", pop)) +
+          theme(plot.title = element_text(hjust = 0.5)))
+  dev.off()
+  
+  # lightpattern 
+  
+  m_cast <- dcast(m,formula = gene + PatternLight ~ variable, value.var = 'value')
+  annotation_table <- as.data.frame(table(m_cast$PatternLight))
+  colnames(annotation_table) <- c("PatternLight","Ngenes")
+  
+  saveRDS(m, paste0(pop, "/PatternsLight_dataframe_Classification_",pop,".rds"))
+  saveRDS(m_cast, paste0(pop, "/PatternsLight_dataframe_casted_Classification_",pop,".rds"))
+  saveRDS(annotation_table, paste0(pop,"/Annotation_tableLight_Classification_",pop,".rds"))
+  
+  tmpdf <- as.data.frame(table(m$PatternLight))
+  new_labels <- paste0(tmpdf$Var1, "(", tmpdf$Freq, ")")
+  names(new_labels) <- tmpdf$Var1
+  
+  png(paste0(pop, "/",pop, "_patternsLight_Classification.png"), width = 6, height = 6, units = "in", res = 300)
+  print(ggplot(m, mapping = aes(x = variable, y = value, color = PatternLight, group = gene)) + geom_line() + #, xmin=-12, xmax=-7, ymin=-12, ymax=-7) +
+          facet_wrap( . ~ PatternLight, ncol = 4, labeller = labeller(PatternLight = new_labels)
+          ) +
+          ggtitle(label = paste0("Culture effect - ", pop)) +
+          theme(plot.title = element_text(hjust = 0.5)))
+  dev.off()
+  
+  genes2evaluate[[pop]] <- readLines(con = paste0(marker_dir,"/Common_markers_complete-pairwise_Classification_",pop,".txt"))
+  
+  expr_matrix[[pop]] <- as.data.frame(AverageExpression(object = subset(obj, cells = WhichCells(object = obj, expression = Classification == pop)), 
+                                                        features = genes2evaluate[[pop]], group.by = "orig.ident")$RNA)
+  
+  
+  saveRDS(expr_matrix[[pop]], paste0(pop, "/Expression_matrix_Classification_",pop,".rds"))
+  
+}
+
+write.xlsx(x = expr_matrix, file = "Expression_matrices_modulated_genes_Classification.xlsx", asTable = T, overwrite = T,
+          rowNames = T, firstRow = T)
+
+###### heatmaps average expression ######
+###  with light patterns ###
+
+Patterns_dataframe_casted <- list()
+
+for(pop in names(genes2evaluate)){ 
+Patterns_dataframe_casted[[pop]] <- readRDS(paste0(pop, "/PatternsLight_dataframe_casted_Classification_",pop,".rds"))
+}
+
+for(pop in names(genes2evaluate)){
+#for(pop in c("0")){
+  outdir <- paste0(pop,"/")
+  dir.create(outdir, showWarnings = F)
+  for(i in unique(Patterns_dataframe_casted[[pop]]$PatternLight)){
+    ttt <- Patterns_dataframe_casted[[pop]]
+    goi <- ttt[which(ttt$PatternLight == i),]$gene
+    if(length(goi) < 3){
+      next
+    }
+    cur.mat <- expr_matrix[[pop]][goi,]
+    cur.mat <- subset(cur.mat, select = c("C04","C02","C03"))
+    cur.mat <- as.matrix(cur.mat)
+    pheatmap::pheatmap(mat = cur.mat, 
+                       scale = "none",cluster_cols = F,
+                       display_numbers = T,
+                       main = paste0("Avg Expression Pattern ",i),
+                       filename = paste0(outdir,pop,"_pattern_",i,".pdf"))
+  }
+  
+  d <- Patterns_dataframe_casted[[pop]][,c("gene","PatternLight")]
+  rownames(d) <- d$gene
+  colnames(d)[2] <- "Pattern"
+  d$gene <- NULL
+  d$Pattern <- factor(x = d$Pattern, levels = sort(unique(d$Pattern)))
+  
+  mycolors <- brewer.pal(n = length(unique(d$Pattern)), name = "Paired")
+  names(mycolors) <- unique(d$Pattern)
+  mycolors <- list(cluster = mycolors)
+  fullmat <- as.matrix(expr_matrix[[pop]][ order(row.names(expr_matrix[[pop]])), ])
+  
+  pheatmap::pheatmap(fullmat, scale = "row", 
+                     show_rownames = F, annotation_row = d, 
+                     annotation_colors = mycolors,
+                     filename = paste0(outdir,pop,"_all_clusters.pdf"))
+}
+
+
+enrichmentdir <- "enrichment/"
+dir.create(enrichmentdir) 
+m_t2g <- read.gmt(gmtfile = "msigdb.v2023.1.Hs.symbols.gmt")
+
+#### Perform enrichment #####
+
+for(pop in names(genes2evaluate)){
+  
+  tmp_obj <- SetIdent(object = obj, value = "Classification")
+  tmp_obj <- subset(tmp_obj, idents = pop)
+  myuni <- names(which(rowSums(tmp_obj@assays$RNA@counts) > 0)) # universe with genes expressed in at least 1 cell in current population
+  
+  writeLines(text = myuni, con = paste0(enrichmentdir,pop, "_universe.txt"))
+  enrichobj <- list()
+  
+  for(clus in levels(Patterns_dataframe_casted[[pop]]$PatternLight)){
+    
+    enrichobj[[paste0("cl_",clus)]] <- enricher(Patterns_dataframe_casted[[pop]][which(Patterns_dataframe_casted[[pop]]$PatternLight == clus),"gene"], 
+                                                TERM2GENE=m_t2g, 
+                                                universe = myuni, pvalueCutoff = 0.05)
+  }
+  
+  ###### MultiComparison ######
+  
+  genesets_clusters <- list()
+  
+  for(cl in base::unique(Patterns_dataframe_casted[[pop]]$PatternLight)){
+    genesets_clusters[[paste0("cl_",cl)]] <- Patterns_dataframe_casted[[pop]][which(Patterns_dataframe_casted[[pop]]$PatternLight == cl),"gene"]
+  }
+  
+  xx <- list()
+  for(ontocat in c("MF","BP","CC")){
+    xx[[ontocat]] <- compareCluster(genesets_clusters, fun="enrichGO", pvalueCutoff=0.05,
+                                    OrgDb='org.Hs.eg.db', keyType = "SYMBOL", universe = myuni, 
+                                    ont = ontocat)
+    xx[[ontocat]] <- pairwise_termsim(xx[[ontocat]])                     
+    pdf(file = paste0(enrichmentdir,pop,"_EnrichGO_",ontocat,".pdf"), width = 16, height = 12, paper = "a4r")
+    goplot <- emapplot(xx[[ontocat]], pie="count", cex_category=1, cex_line = 0.4, layout="kk")
+    print(goplot)
+    dev.off()
+    saveRDS(object = xx[[ontocat]] , file = paste0(enrichmentdir, pop, "_enrichGO_multi_", ontocat, ".rds"))
+  }
+  
+  saveRDS(object = genesets_clusters, file = paste0(enrichmentdir,pop, "_genesets_clusters_list.rds"))
+  saveRDS(object = enrichobj, file = paste0(enrichmentdir,pop, "_enrichobj_msigdb.rds"))
+  
+}
+
+
--- a/Supplemental_fig2_part2.R
+++ b/Supplemental_fig2_part2.R
+# Identify the a shared and common effect of the culturing in celltypes as well as cell-specific modulations.
+
+library(Seurat)
+library(MAST)
+library(reshape2)
+library(ggplot2)
+library(gridExtra)
+library(pheatmap)
+library(ComplexHeatmap)
+library(RColorBrewer)
+library(openxlsx)
+library(clusterProfiler)
+library(msigdbr)
+library(enrichplot)
+
+rootdir <- "CultureEffectClassification/"
+
+# Step 1. Identify classes of genes that fall into the different patterns ####
+# Pattern UP: UPUP, nsUP, UPns [consistently UP, late UP response, early UP response]
+# Pattern DW: DWDW, nsDW, DWns [consistently DW, late DW response, early DW response]
+# Transient UP response: UPDW
+# Transient DW response: DWUP
+
+# cicle over populations and store according to categories above #
+
+populations <- c("EryProg","GMP","HSC","HSPC","MCP",
+                 "MEP-cycling_I","MEP-cycling_I", 
+                 "MEP-EryProg-I", "MEP-EryProg-II",
+                 "MkProg","MLP","MonoProg",
+                 "Erythrocytes","Prog-cycling_I","Prog-cycling_II"#,"Ribo+"
+                 )
+
+dframe_X_classes <- list()
+genes_X_classes <- list()
+
+for(popcell in populations){
+  mydata <- readRDS(paste0(rootdir,popcell,"/Pairwise_tests_significant_Classification_",popcell,"_binary.rds"))
+  mydata$Class <- "undetermined"
+  mydata$Class[which(mydata$PatternLight %in% c("UPUP","nsUP","UPns","nsnsUP"))] <- "UP"
+  mydata$Class[which(mydata$PatternLight %in% c("DWDW","nsDW","DWns","nsnsDW"))] <- "DW"
+  mydata$Class[which(mydata$PatternLight %in% c("UPDW"))] <- "trUP"
+  mydata$Class[which(mydata$PatternLight %in% c("DWUP"))] <- "trDW"
+  dframe_X_classes[[popcell]] <- split(x = mydata, f = mydata$Class)
+  for(j in names(dframe_X_classes[[popcell]])){
+    genes_X_classes[[popcell]][[j]] <- rownames(dframe_X_classes[[popcell]][[j]])
+  }
+}
+
+# do upsetR on specific patterns for all ####
+
+library(UpSetR)
+
+class_X_pop <- list()
+
+for(popcell in populations){
+  for(k in names(genes_X_classes[[popcell]])){
+    class_X_pop[[k]][[popcell]] <- genes_X_classes[[popcell]][[k]]
+  }
+}
+
+
+# create upset input and create chart ####
+
+PaperDir <- paste0(rootdir, "Paper/")
+dir.create(PaperDir)
+
+upsetr.input <- list()
+upsetr.input_string <- list()
+
+for(j in names(class_X_pop)){
+  upsetr.input[[j]] <- fromList(class_X_pop[[j]])
+  rownames(upsetr.input[[j]]) <- unique(unlist(class_X_pop[[j]]))
+  pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations.pdf"), width = 18, height = 6)
+  print(upset(upsetr.input[[j]] , order.by = "freq", sets = rev(colnames(upsetr.input[[j]])), keep.order = TRUE, nintersects = NA))
+  dev.off()
+}
+
+class_X_pop_symplified <- list(DW = NULL, UP = NULL,
+                               trUP = NULL, trDW = NULL)
+
+for(k in names(class_X_pop_symplified)){
+  class_X_pop_symplified[[k]]$MyeLineage <- c(class_X_pop[[k]]$GMP)
+  class_X_pop_symplified[[k]]$MonoProg <- class_X_pop[[k]]$MonoProg
+  
+  class_X_pop_symplified[[k]]$MCP <- class_X_pop[[k]]$MCP
+  class_X_pop_symplified[[k]]$MkEryLineage <- c(class_X_pop[[k]]$EryProg, class_X_pop[[k]]$`MEP-cycling_I`, 
+                                       class_X_pop[[k]]$`MEP-EryProg-I`, class_X_pop[[k]]$`MEP-EryProg-II`, 
+                                       class_X_pop[[k]]$MkProg, class_X_pop[[k]]$Erythrocytes)
+  class_X_pop_symplified[[k]]$ProgCycling <- c(class_X_pop[[k]]$`Prog-cycling_I`, 
+                                               class_X_pop[[k]]$`Prog-cycling_II`)
+  class_X_pop_symplified[[k]]$HSC <- c(class_X_pop[[k]]$HSC, class_X_pop[[k]]$MLP)
+  class_X_pop_symplified[[k]]$HSPC <- c(class_X_pop[[k]]$HSPC)
+}
+
+upsetr.input.simple <- list()
+upsetr.input.simple_string <- list()
+
+colpalette <- c("#A6CEE3","#B2DF8A","#FDBF6F","#E78AC3","#FFFF99","#E31A1C","darkgrey")
+Labelset <- c("HSC","HSPC","MonoProg","MyeLineage","MCP","MkEryLineage","ProgCycling")
+names(Labelset) <- colpalette
+
+for(j in names(class_X_pop_symplified)){
+  upsetr.input.simple[[j]] <- fromList(class_X_pop_symplified[[j]])
+  rownames(upsetr.input.simple[[j]]) <- unique(unlist(class_X_pop_symplified[[j]]))
+   
+  myset <- Labelset[Labelset %in% colnames(upsetr.input.simple[[j]])]
+  
+  pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations_simply.pdf"), width = 18, height = 6)
+  print(upset(upsetr.input.simple[[j]] , order.by = "freq", sets.bar.color = names(myset),
+              sets = myset,
+              keep.order = TRUE, nintersects = NA))
+  dev.off()
+}
+
+
+### Which are the common pathways that are modulated across celltypes upon culturing #####
+
+for(j in c("DW","UP")){
+  tmpdata <- upsetr.input.simple[[j]]
+  tmpdata_shared <- tmpdata[rowSums(tmpdata) > 3,]
+  pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 , 
+                                                 paste , collapse = "")
+  upsetr.input.simple_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
+  
+  for (k in colnames(tmpdata)) {
+    test <- "0000000"
+    substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
+    print(test)
+    upsetr.input.simple_string[[j]][[k]] <- pattern_data[pattern_data == test]
+  }
+  
+  ### detailed version 
+  
+  tmpdata <- upsetr.input[[j]]
+  tmpdata_shared <- tmpdata[rowSums(tmpdata) > 9,]
+  pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 , 
+                         paste , collapse = "")
+  upsetr.input_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
+  
+  for (k in colnames(tmpdata)) {
+    test <- "00000000000000"
+    substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
+    print(test)
+    upsetr.input_string[[j]][[k]] <- pattern_data[pattern_data == test]
+  }
+}
+
+#### Perform enrichment #####
+
+# create universes for combined simple labels ####
+
+universes <- list()
+universes_symplified <- list()
+
+for(m in colnames(upsetr.input$DW)){
+  universes[[m]] <- readLines(paste0(rootdir, "enrichment/",m,"_universe.txt"))
+}
+
+universes_symplified$MyeLineage <- unique(universes$GMP)
+universes_symplified$MonoProg <- unique(universes$MonoProg)
+universes_symplified$MkEryLineage <- unique(c(universes$EryProg, universes$`MEP-cycling_I`, 
+                                     universes$`MEP-EryProg-I`, universes$`MEP-EryProg-II`, 
+                                     universes$MkProg, universes$Erythrocytes))
+universes_symplified$ProgCycling <- unique(c(universes$`Prog-cycling_I`, 
+                                     universes$`Prog-cycling_II`))
+universes_symplified$HSC <- unique(c(universes$HSC, universes$MLP))
+universes_symplified$HSPC <- unique(c(universes$HSPC))
+universes_symplified$MCP <- unique(c(universes$MCP))
+universes_symplified$shared <- unique(unlist(universes_symplified))
+
+universes$shared <- unique(unlist(universes))
+
+# Perform enrichment #####
+
+enrichmentdir <- "PatternsEnrichments/"
+dir.create(enrichmentdir) 
+m_t2g <- read.gmt(gmtfile = "h.all.v2023.1.Hs.symbols.gmt")
+
+#### Perform enrichment #####
+
+enrichobj <- list()
+upsetr.input.simple_names <- list()
+for(j in names(upsetr.input.simple_string)){
+  for(i in names(upsetr.input.simple_string[[j]])){
+    upsetr.input.simple_names[[j]][[i]] <- names(upsetr.input.simple_string[[j]][[i]])
+  }
+}
+
+
+for(a in c("DW","UP")){
+  xx <- compareCluster(upsetr.input.simple_names[[a]], fun="enrichGO", pvalueCutoff=0.05,
+                       OrgDb='org.Hs.eg.db', keyType = "SYMBOL", universe = universes_symplified[["shared"]], 
+                       ont = "BP")
+  saveRDS(xx, file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".rds"))
+  # if(nrow(xx@compareClusterResult) < 2){
+  #   next
+  # }
+  xx <- pairwise_termsim(xx)                     
+  pdf(file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".pdf"), width = 16, height = 12, paper = "a4r")
+  goplot <- emapplot(xx, pie="count", cex_category=1, cex_line = 0.4, layout="kk")
+  print(goplot)
+  dev.off()
+  
+  for(j in names(universes_symplified)){
+  enrichobj[[j]][[a]] <- enricher(names(upsetr.input.simple_string[[a]][[j]]), TERM2GENE=m_t2g,
+                               universe = universes_symplified[[j]], pvalueCutoff = 0.05)
+  }
+}
+
+saveRDS(enrichobj,"Paper/Hallmarks_enrichments_patternSimple_populations.rds")
+saveRDS(upsetr.input_string, file = "Paper/upsetr.input_string.rds")
+saveRDS(upsetr.input.simple_string, file = "Paper/upsetr.input.simple_string.rds")
+
+## Creating common plot - HALLMARKS ####
+
+for(i in names(enrichobj)){
+  for(k in names(enrichobj[[i]])){
+    enrichobj[[i]][[k]]@result$Pop <- i
+    enrichobj[[i]][[k]]@result$Direction <- k
+    enrichobj[[i]][[k]]@result$ID <- gsub(x = enrichobj[[i]][[k]]@result$ID, pattern = "HALLMARK_", replacement = "")
+    enrichobj[[i]][[k]]@result$Description <- gsub(x = enrichobj[[i]][[k]]@result$Description, pattern = "HALLMARK_", replacement = "")
+    rownames(enrichobj[[i]][[k]]@result) <- gsub(x = rownames(enrichobj[[i]][[k]]@result), pattern = "HALLMARK_", replacement = "")
+  }
+}
+
+df <- data.frame()
+
+for(i in names(enrichobj)){
+  for(k in names(enrichobj[[i]])){
+    df <- rbind.data.frame(df, enrichobj[[i]][[k]]@result)
+  }
+}
+
+df_binarized <- df
+df_binarized$TileVal <- 0
+df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "UP")] <- 1
+df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "DW")] <- -1
+
+library(reshape2)
+df_binarized <- df_binarized[df_binarized$TileVal != 0,]
+df_binarized_mt <- dcast(data = df_binarized, formula = ID ~ Pop + Direction, value.var = "TileVal")
+df_binarized_mt[is.na(df_binarized_mt)] <- 0
+rownames(df_binarized_mt) <- df_binarized_mt$ID
+df_binarized_mt$ID <- NULL
+
+png(filename = "Paper/Hallmarks_enrichments_pheatmap.png", width = 12, height = 6, units = "in", res = 300)
+pheatmap(df_binarized_mt)
+dev.off()
+
+library(ggplot2)
+png(filename = "Paper/Hallmarks_enrichments_tileplot.png", width = 12, height = 6, units = "in", res = 300)
+ggplot(data = df_binarized, mapping = aes(x = Direction, y = ID, fill = TileVal)) + geom_tile() +
+  facet_grid(. ~ Pop, scales = "free", space = "free") + 
+  scale_fill_distiller(palette = "RdBu", direction = -1)
+dev.off()
+
+saveRDS(object = df_binarized_mt, "Paper/Hallmarks_enrichments_pheatmap_data.rds")
+saveRDS(object = df_binarized, "Paper/Hallmarks_enrichments_tileplot_data.rds")
+
+library(enrichplot)
+
+UP_PLOT <- cnetplot(enrichobj$shared$UP, showCategory = 20, cex_label_gene = 0.6,
+         cex_label_category = 1, color_category = "#E31A1C", ) + 
+  ggtitle("Enrichment from UP genes") + 
+  #ggtitle("Enriched hallmarks from UP genes across culturing") + 
+  theme(plot.title = element_text(hjust = 0.5), 
+        legend.position = "bottom")
+
+DW_PLOT <- cnetplot(enrichobj$shared$DW, showCategory = 20, cex_label_gene = 0.6,
+                    cex_label_category = 1,  color_category = "#1F78B4") +
+  ggtitle("Enrichment from DW genes") + 
+  #ggtitle("Enriched hallmarks from DW genes across culturing") + 
+  theme(plot.title = element_text(hjust = 0.5), 
+        legend.position = "bottom")
+
+library(gridExtra)
+
+png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.png", 
+    width = 24, height = 12, units = "in", res = 300)
+grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.5, 0.5))
+dev.off()
+
+png(filename = "Paper/Hallmark_enriched_terms_common_UP_modulated_genes_across_populations.png", 
+    width = 12, height = 12, units = "in", res = 300)
+UP_PLOT + theme(plot.title = element_blank())
+dev.off()
+
+png(filename = "Paper/Hallmark_enriched_terms_common_DW_modulated_genes_across_populations.png", 
+    width = 12, height = 12, units = "in", res = 300)
+DW_PLOT + theme(plot.title = element_blank())
+dev.off()
+
+###
+
+png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations_B.png", 
+    width = 12, height = 9, units = "in", res = 300)
+grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
+dev.off()
+
+pdf(file = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.pdf", 
+       width = 12, height = 9)
+gridExtra::grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
+dev.off()