Commit 192b1ccd authored by Matteo Barcella's avatar Matteo Barcella
Browse files

Supplemental fig2 preprocessing scripts and figure plot

parent ecad0b3b
This diff is collapsed.
# Identify the a shared and common effect of the culturing in celltypes as well as cell-specific modulations.
library(Seurat)
library(MAST)
library(reshape2)
library(ggplot2)
library(gridExtra)
library(pheatmap)
library(ComplexHeatmap)
library(RColorBrewer)
library(openxlsx)
library(clusterProfiler)
library(msigdbr)
library(enrichplot)
rootdir <- "CultureEffectClassification/"
# Step 1. Identify classes of genes that fall into the different patterns ####
# Pattern UP: UPUP, nsUP, UPns [consistently UP, late UP response, early UP response]
# Pattern DW: DWDW, nsDW, DWns [consistently DW, late DW response, early DW response]
# Transient UP response: UPDW
# Transient DW response: DWUP
# cicle over populations and store according to categories above #
populations <- c("EryProg","GMP","HSC","HSPC","MCP",
"MEP-cycling_I","MEP-cycling_I",
"MEP-EryProg-I", "MEP-EryProg-II",
"MkProg","MLP","MonoProg",
"Erythrocytes","Prog-cycling_I","Prog-cycling_II"#,"Ribo+"
)
dframe_X_classes <- list()
genes_X_classes <- list()
for(popcell in populations){
mydata <- readRDS(paste0(rootdir,popcell,"/Pairwise_tests_significant_Classification_",popcell,"_binary.rds"))
mydata$Class <- "undetermined"
mydata$Class[which(mydata$PatternLight %in% c("UPUP","nsUP","UPns","nsnsUP"))] <- "UP"
mydata$Class[which(mydata$PatternLight %in% c("DWDW","nsDW","DWns","nsnsDW"))] <- "DW"
mydata$Class[which(mydata$PatternLight %in% c("UPDW"))] <- "trUP"
mydata$Class[which(mydata$PatternLight %in% c("DWUP"))] <- "trDW"
dframe_X_classes[[popcell]] <- split(x = mydata, f = mydata$Class)
for(j in names(dframe_X_classes[[popcell]])){
genes_X_classes[[popcell]][[j]] <- rownames(dframe_X_classes[[popcell]][[j]])
}
}
# do upsetR on specific patterns for all ####
library(UpSetR)
class_X_pop <- list()
for(popcell in populations){
for(k in names(genes_X_classes[[popcell]])){
class_X_pop[[k]][[popcell]] <- genes_X_classes[[popcell]][[k]]
}
}
# create upset input and create chart ####
PaperDir <- paste0(rootdir, "Paper/")
dir.create(PaperDir)
upsetr.input <- list()
upsetr.input_string <- list()
for(j in names(class_X_pop)){
upsetr.input[[j]] <- fromList(class_X_pop[[j]])
rownames(upsetr.input[[j]]) <- unique(unlist(class_X_pop[[j]]))
pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations.pdf"), width = 18, height = 6)
print(upset(upsetr.input[[j]] , order.by = "freq", sets = rev(colnames(upsetr.input[[j]])), keep.order = TRUE, nintersects = NA))
dev.off()
}
class_X_pop_symplified <- list(DW = NULL, UP = NULL,
trUP = NULL, trDW = NULL)
for(k in names(class_X_pop_symplified)){
class_X_pop_symplified[[k]]$MyeLineage <- c(class_X_pop[[k]]$GMP)
class_X_pop_symplified[[k]]$MonoProg <- class_X_pop[[k]]$MonoProg
class_X_pop_symplified[[k]]$MCP <- class_X_pop[[k]]$MCP
class_X_pop_symplified[[k]]$MkEryLineage <- c(class_X_pop[[k]]$EryProg, class_X_pop[[k]]$`MEP-cycling_I`,
class_X_pop[[k]]$`MEP-EryProg-I`, class_X_pop[[k]]$`MEP-EryProg-II`,
class_X_pop[[k]]$MkProg, class_X_pop[[k]]$Erythrocytes)
class_X_pop_symplified[[k]]$ProgCycling <- c(class_X_pop[[k]]$`Prog-cycling_I`,
class_X_pop[[k]]$`Prog-cycling_II`)
class_X_pop_symplified[[k]]$HSC <- c(class_X_pop[[k]]$HSC, class_X_pop[[k]]$MLP)
class_X_pop_symplified[[k]]$HSPC <- c(class_X_pop[[k]]$HSPC)
}
upsetr.input.simple <- list()
upsetr.input.simple_string <- list()
colpalette <- c("#A6CEE3","#B2DF8A","#FDBF6F","#E78AC3","#FFFF99","#E31A1C","darkgrey")
Labelset <- c("HSC","HSPC","MonoProg","MyeLineage","MCP","MkEryLineage","ProgCycling")
names(Labelset) <- colpalette
for(j in names(class_X_pop_symplified)){
upsetr.input.simple[[j]] <- fromList(class_X_pop_symplified[[j]])
rownames(upsetr.input.simple[[j]]) <- unique(unlist(class_X_pop_symplified[[j]]))
myset <- Labelset[Labelset %in% colnames(upsetr.input.simple[[j]])]
pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations_simply.pdf"), width = 18, height = 6)
print(upset(upsetr.input.simple[[j]] , order.by = "freq", sets.bar.color = names(myset),
sets = myset,
keep.order = TRUE, nintersects = NA))
dev.off()
}
### Which are the common pathways that are modulated across celltypes upon culturing #####
for(j in c("DW","UP")){
tmpdata <- upsetr.input.simple[[j]]
tmpdata_shared <- tmpdata[rowSums(tmpdata) > 3,]
pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 ,
paste , collapse = "")
upsetr.input.simple_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
for (k in colnames(tmpdata)) {
test <- "0000000"
substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
print(test)
upsetr.input.simple_string[[j]][[k]] <- pattern_data[pattern_data == test]
}
### detailed version
tmpdata <- upsetr.input[[j]]
tmpdata_shared <- tmpdata[rowSums(tmpdata) > 9,]
pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 ,
paste , collapse = "")
upsetr.input_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
for (k in colnames(tmpdata)) {
test <- "00000000000000"
substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
print(test)
upsetr.input_string[[j]][[k]] <- pattern_data[pattern_data == test]
}
}
#### Perform enrichment #####
# create universes for combined simple labels ####
universes <- list()
universes_symplified <- list()
for(m in colnames(upsetr.input$DW)){
universes[[m]] <- readLines(paste0(rootdir, "enrichment/",m,"_universe.txt"))
}
universes_symplified$MyeLineage <- unique(universes$GMP)
universes_symplified$MonoProg <- unique(universes$MonoProg)
universes_symplified$MkEryLineage <- unique(c(universes$EryProg, universes$`MEP-cycling_I`,
universes$`MEP-EryProg-I`, universes$`MEP-EryProg-II`,
universes$MkProg, universes$Erythrocytes))
universes_symplified$ProgCycling <- unique(c(universes$`Prog-cycling_I`,
universes$`Prog-cycling_II`))
universes_symplified$HSC <- unique(c(universes$HSC, universes$MLP))
universes_symplified$HSPC <- unique(c(universes$HSPC))
universes_symplified$MCP <- unique(c(universes$MCP))
universes_symplified$shared <- unique(unlist(universes_symplified))
universes$shared <- unique(unlist(universes))
# Perform enrichment #####
enrichmentdir <- "PatternsEnrichments/"
dir.create(enrichmentdir)
m_t2g <- read.gmt(gmtfile = "h.all.v2023.1.Hs.symbols.gmt")
#### Perform enrichment #####
enrichobj <- list()
upsetr.input.simple_names <- list()
for(j in names(upsetr.input.simple_string)){
for(i in names(upsetr.input.simple_string[[j]])){
upsetr.input.simple_names[[j]][[i]] <- names(upsetr.input.simple_string[[j]][[i]])
}
}
for(a in c("DW","UP")){
xx <- compareCluster(upsetr.input.simple_names[[a]], fun="enrichGO", pvalueCutoff=0.05,
OrgDb='org.Hs.eg.db', keyType = "SYMBOL", universe = universes_symplified[["shared"]],
ont = "BP")
saveRDS(xx, file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".rds"))
# if(nrow(xx@compareClusterResult) < 2){
# next
# }
xx <- pairwise_termsim(xx)
pdf(file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".pdf"), width = 16, height = 12, paper = "a4r")
goplot <- emapplot(xx, pie="count", cex_category=1, cex_line = 0.4, layout="kk")
print(goplot)
dev.off()
for(j in names(universes_symplified)){
enrichobj[[j]][[a]] <- enricher(names(upsetr.input.simple_string[[a]][[j]]), TERM2GENE=m_t2g,
universe = universes_symplified[[j]], pvalueCutoff = 0.05)
}
}
saveRDS(enrichobj,"Paper/Hallmarks_enrichments_patternSimple_populations.rds")
saveRDS(upsetr.input_string, file = "Paper/upsetr.input_string.rds")
saveRDS(upsetr.input.simple_string, file = "Paper/upsetr.input.simple_string.rds")
## Creating common plot - HALLMARKS ####
for(i in names(enrichobj)){
for(k in names(enrichobj[[i]])){
enrichobj[[i]][[k]]@result$Pop <- i
enrichobj[[i]][[k]]@result$Direction <- k
enrichobj[[i]][[k]]@result$ID <- gsub(x = enrichobj[[i]][[k]]@result$ID, pattern = "HALLMARK_", replacement = "")
enrichobj[[i]][[k]]@result$Description <- gsub(x = enrichobj[[i]][[k]]@result$Description, pattern = "HALLMARK_", replacement = "")
rownames(enrichobj[[i]][[k]]@result) <- gsub(x = rownames(enrichobj[[i]][[k]]@result), pattern = "HALLMARK_", replacement = "")
}
}
df <- data.frame()
for(i in names(enrichobj)){
for(k in names(enrichobj[[i]])){
df <- rbind.data.frame(df, enrichobj[[i]][[k]]@result)
}
}
df_binarized <- df
df_binarized$TileVal <- 0
df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "UP")] <- 1
df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "DW")] <- -1
library(reshape2)
df_binarized <- df_binarized[df_binarized$TileVal != 0,]
df_binarized_mt <- dcast(data = df_binarized, formula = ID ~ Pop + Direction, value.var = "TileVal")
df_binarized_mt[is.na(df_binarized_mt)] <- 0
rownames(df_binarized_mt) <- df_binarized_mt$ID
df_binarized_mt$ID <- NULL
png(filename = "Paper/Hallmarks_enrichments_pheatmap.png", width = 12, height = 6, units = "in", res = 300)
pheatmap(df_binarized_mt)
dev.off()
library(ggplot2)
png(filename = "Paper/Hallmarks_enrichments_tileplot.png", width = 12, height = 6, units = "in", res = 300)
ggplot(data = df_binarized, mapping = aes(x = Direction, y = ID, fill = TileVal)) + geom_tile() +
facet_grid(. ~ Pop, scales = "free", space = "free") +
scale_fill_distiller(palette = "RdBu", direction = -1)
dev.off()
saveRDS(object = df_binarized_mt, "Paper/Hallmarks_enrichments_pheatmap_data.rds")
saveRDS(object = df_binarized, "Paper/Hallmarks_enrichments_tileplot_data.rds")
library(enrichplot)
UP_PLOT <- cnetplot(enrichobj$shared$UP, showCategory = 20, cex_label_gene = 0.6,
cex_label_category = 1, color_category = "#E31A1C", ) +
ggtitle("Enrichment from UP genes") +
#ggtitle("Enriched hallmarks from UP genes across culturing") +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")
DW_PLOT <- cnetplot(enrichobj$shared$DW, showCategory = 20, cex_label_gene = 0.6,
cex_label_category = 1, color_category = "#1F78B4") +
ggtitle("Enrichment from DW genes") +
#ggtitle("Enriched hallmarks from DW genes across culturing") +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")
library(gridExtra)
png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.png",
width = 24, height = 12, units = "in", res = 300)
grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.5, 0.5))
dev.off()
png(filename = "Paper/Hallmark_enriched_terms_common_UP_modulated_genes_across_populations.png",
width = 12, height = 12, units = "in", res = 300)
UP_PLOT + theme(plot.title = element_blank())
dev.off()
png(filename = "Paper/Hallmark_enriched_terms_common_DW_modulated_genes_across_populations.png",
width = 12, height = 12, units = "in", res = 300)
DW_PLOT + theme(plot.title = element_blank())
dev.off()
###
png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations_B.png",
width = 12, height = 9, units = "in", res = 300)
grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
dev.off()
pdf(file = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.pdf",
width = 12, height = 9)
gridExtra::grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
dev.off()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment