Commit 192b1ccd authored by Matteo Barcella's avatar Matteo Barcella
Browse files

Supplemental fig2 preprocessing scripts and figure plot

parent ecad0b3b
# Evaluating the effect of culturing across time
library(Seurat)
library(MAST)
library(reshape2)
library(ggplot2)
library(gridExtra)
library(pheatmap)
library(ComplexHeatmap)
library(RColorBrewer)
library(openxlsx)
library(clusterProfiler)
library(msigdbr)
library(enrichplot)
# load object
obj <- readRDS(file = "Fig2C_Dataset.rds")
obj <- SetIdent(obj, value = "orig.ident")
obj <- subset(obj, idents = c("C02","C03","C04"))
culture_markers <- list()
for(pop in unique(obj$Classification)){
dir.create(pop)
tmp_obj <- SetIdent(obj, value = "Classification")
tmp_obj <- subset(x = tmp_obj, idents = pop)
tmp_obj <- SetIdent(tmp_obj, value = "orig.ident")
culture_markers[[pop]] <- FindAllMarkers(object = tmp_obj, return.thresh = 1e-05,
random.seed = 300, test.use = "MAST",
min.cells.group = 5)
}
# pairwise style
culture_markers_paiwise <- list(C02vsC04 = list(),
C03vsC02 = list(),
C03vsC04 = list())
for(comp in names(culture_markers_paiwise)){
print(paste0("Running comparison: ", comp))
for(pop in unique(obj$Classification)){
print(paste0("Running population: ", pop))
tmp_obj <- SetIdent(obj, value = "Classification")
tmp_obj <- subset(x = tmp_obj, idents = pop)
id1 <- strsplit(comp,split = "vs")[[1]][1]
id2 <- strsplit(comp,split = "vs")[[1]][2]
# check number of cells (if less than 5 skip population)
id1_l <- length(which(tmp_obj$orig.ident == id1))
id2_l <- length(which(tmp_obj$orig.ident == id2))
print(paste0("Group 1 n°cells: ", id1_l, "; Group 2 n°cells: ", id2_l))
if(id1_l < 5 | id2_l < 5){
print("Skipping comparison ....")
next
}else{
tmp_obj <- SetIdent(tmp_obj, value = "orig.ident")
culture_markers_paiwise[[comp]][[pop]] <- FindMarkers(object = tmp_obj,
ident.1 = id1,
ident.2 = id2,
random.seed = 300,
test.use = "MAST",
logfc.threshold = 0, # for reporting logFC even if not significant
min.cells.group = 5)
}
}
}
# Markers directory
marker_dir <- "Supplemental_Fig2/"
dir.create(marker_dir)
saveRDS(culture_markers, file = paste0(marker_dir,"culture_markers_Classification.rds"))
saveRDS(culture_markers_paiwise, file = paste0(marker_dir,"culture_markers_paiwise_Classification.rds"))
write.xlsx(x = culture_markers, file = paste0(marker_dir, "culture_markers_Classification.xlsx"))
for(i in names(culture_markers_paiwise)){
write.xlsx(x = culture_markers_paiwise[[i]], file = paste0(marker_dir, "culture_markers_Classification_",i,".xlsx"))
}
### TimeCourse - like analysis (identify patterns) ###
genes2evaluate <- list()
for(pop in unique(obj$Classification)){
Z <- culture_markers[[pop]]
findall <- length(unique(Z[Z$p_val_adj < 0.01,]$gene))
A1 <- culture_markers_paiwise$C02vsC04[[pop]]
B1 <- culture_markers_paiwise$C03vsC02[[pop]]
C1 <- culture_markers_paiwise$C03vsC04[[pop]]
# only significative with logFC > 0.25
agenes <- NULL
bgenes <- NULL
cgenes <- NULL
if(!is.null(nrow(A1))){
A <- A1[A1$p_val_adj < 0.01 & abs(A1$avg_log2FC) > 0.25,]
agenes <- rownames(A)
}else{
print("no diff genes in C02vsC04 !")
}
if(!is.null(nrow(B1))){
B <- B1[B1$p_val_adj < 0.01 & abs(B1$avg_log2FC) > 0.25,]
bgenes <- rownames(B)
}else{
print("no diff genes in C03vsC02 !")
}
if(!is.null(nrow(C1))){
C <- C1[C1$p_val_adj < 0.01 & abs(C1$avg_log2FC) > 0.25,]
cgenes <- rownames(C)
}else{
print("no diff genes in C03vsC04 !")
}
# numero geni totali derivanti da confronti pairwise
pairwiseunion.num <- length(unique(c(agenes,
bgenes,
cgenes)
)
)
# number of common genes between global test and (union of all pair-wise comparisons)
common <- length(base::intersect(x = unique(Z[Z$p_val_adj < 0.01,]$gene),
y = unique(c(agenes,
bgenes,
cgenes
))
))
print(paste0("Population ", pop,
" fulltest: ", findall,
"; pairwise union: ", pairwiseunion.num,
"; shared: ", common))
tmpintersection <- base::intersect(x = unique(Z[Z$p_val_adj < 0.01,]$gene),
y = unique(c(agenes,
bgenes,
cgenes
))
)
if(!is.null(tmpintersection)){
genes2evaluate[[pop]] <- tmpintersection
}else{
next
}
}
for(i in names(genes2evaluate)){
writeLines(text = genes2evaluate[[i]], con = paste0(marker_dir, "Common_markers_complete-pairwise_Classification_",i,".txt"))
}
for(pop in names(genes2evaluate)){
mydata <- data.frame(row.names = genes2evaluate[[pop]])
A1 <- culture_markers_paiwise$C02vsC04[[pop]]
B1 <- culture_markers_paiwise$C03vsC02[[pop]]
C1 <- culture_markers_paiwise$C03vsC04[[pop]]
agenes <- NULL
bgenes <- NULL
cgenes <- NULL
if(!is.null(nrow(A1))){
A <- A1[A1$p_val_adj < 0.01 & abs(A1$avg_log2FC) > 0.25,]
agenes <- rownames(A)
}else{
cat("no diff genes in C02vsC04 !")
}
if(!is.null(nrow(B1))){
B <- B1[B1$p_val_adj < 0.01 & abs(B1$avg_log2FC) > 0.25,]
bgenes <- rownames(B)
}else{
cat("no diff genes in C03vsC02 !")
}
if(!is.null(nrow(C1))){
C <- C1[C1$p_val_adj < 0.01 & abs(C1$avg_log2FC) > 0.25,]
cgenes <- rownames(C)
}else{
cat("no diff genes in C03vsC04 !")
}
##### Report only genes that are significantly different and with logFC > 0.25 ######
index1 <- which(rownames(A1) %in% intersect(agenes,genes2evaluate[[pop]]))
index2 <- which(rownames(B1) %in% intersect(bgenes,genes2evaluate[[pop]]))
index3 <- which(rownames(C1) %in% intersect(cgenes,genes2evaluate[[pop]]))
if(length(index1) != 0){
mydata <- merge.data.frame(mydata, A1[index1,"avg_log2FC",drop = F], by.x = 0, by.y = 0, all.x = T, sort = F)
rownames(mydata) <- mydata$Row.names
mydata$Row.names <- NULL
base::colnames(mydata)[1] <- "C02vsC04"
}else{
mydata[["C02vsC04"]] <- NA
}
if(length(index2) != 0){
mydata <- merge.data.frame(mydata, B1[index2,"avg_log2FC",drop = F], by.x = 0, by.y = 0, all.x = T, sort = F)
rownames(mydata) <- mydata$Row.names
mydata$Row.names <- NULL
base::colnames(mydata)[2] <- "C03vsC02"
}else{
mydata[["C03vsC02"]] <- NA
}
if(length(index3) != 0){
mydata <- merge.data.frame(mydata, C1[index3,"avg_log2FC",drop = F], by.x = 0, by.y = 0, all.x = T, sort = F)
rownames(mydata) <- mydata$Row.names
mydata$Row.names <- NULL
base::colnames(mydata)[3] <- "C03vsC04"
}else{
mydata[["C03vsC04"]] <- NA
}
#### using C4 as ref point - starting point
mydata_ref_C04 <- data.frame(row.names = genes2evaluate[[pop]])
mydata_ref_C04$C04 <- 0
mydata_ref_C04$C02 <- 0
mydata_ref_C04$C03 <- 0
mydata_ref_C04[rownames(mydata),"C02"] <- mydata[rownames(mydata),"C02vsC04"]
mydata_ref_C04[rownames(mydata),"C03"] <- mydata[rownames(mydata),"C03vsC04"]
saveRDS(object = mydata, file =paste0(pop,"/Pairwise_tests_significant_Classification_", pop ,".rds"))
saveRDS(object = mydata_ref_C04, file = paste0(pop,"/Pairwise_tests_significant_Classification_", pop ,"_C04_baseline.rds"))
}
###### Binarized mydata #######
expr_matrix <- list()
for(pop in names(genes2evaluate)){
#for(pop in c("10","11")){
mydata <- readRDS(file = paste0(pop,"/Pairwise_tests_significant_Classification_", pop ,".rds"))
mydata_bin <- mydata
mydata_bin$Pattern <- "ND"
mydata_bin$PatternLight <- "ND"
for(i in 1:nrow(mydata)){
gene <- rownames(mydata_bin)[i]
#print(paste0("row ", i, " gene: ", gene))
# Starting with patterns including NAs from left to right (first to third comparison)
# Pattern: nsns(UP)
if(is.na(mydata[gene,1]) & is.na(mydata[gene,2]) & mydata[gene,3] > 0){
mydata_bin[gene,] <- c(0,0,0.5,"nsns(UP)","nsnsUP")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ", paste0(0,0,0.5,"nsns(UP)","nsnsUP", sep = "_")))
next
}
# Pattern: nsns(DW)
if(is.na(mydata[gene,1]) & is.na(mydata[gene,2]) & mydata[gene,3] < 0){
mydata_bin[gene,] <- c(0,0,-0.5,"nsns(DW)","nsnsDW")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,0,-0.5,"nsns(DW)","nsnsDW", sep = "_")))
next
}
# Pattern: nsDW(ns)
if(is.na(mydata[gene,1]) & mydata[gene,2] < 0 & is.na(mydata[gene,3])){
mydata_bin[gene,] <- c(0,0,-1,"nsDW(ns)","nsDW")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,0,-1,"nsDW(ns)","nsDW", sep = "_")))
next
}
# Pattern: nsDW(DW)
if(is.na(mydata[gene,1]) & mydata[gene,2] < 0 & mydata[gene,3] < 0){
mydata_bin[gene,] <- c(0,0,-1,"nsDW(DW)","nsDW")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,0,-1,"nsDW(DW)","nsDW", sep = "_")))
next
}
# Pattern: nsUP(ns)
if(is.na(mydata[gene,1]) & mydata[gene,2] > 0 & is.na(mydata[gene,3])){
mydata_bin[gene,] <- c(0,0,1,"nsUP(ns)","nsUP")
print(paste0(pop, ": row ", i, " gene: ", gene," pattern: ", paste0(0,0,1,"nsUP(ns)","nsUP", sep = "_")))
next
}
# Pattern: nsUP(UP)
if(is.na(mydata[gene,1]) & mydata[gene,2] > 0 & mydata[gene,3] > 0){
mydata_bin[gene,] <- c(0,0,1,"nsUP(UP)","nsUP")
print(paste0(pop, ": row ", i, " gene: ", gene," pattern: ", paste0(0,0,1,"nsUP(UP)","nsUP", sep = "_")))
next
}
#### Condition with NA at second column
# Pattern: UPns(ns)
if(mydata[gene,1] > 0 & is.na(mydata[gene,2]) & is.na(mydata[gene,3])){
mydata_bin[gene,] <- c(0,1,1,"UPns(ns)","UPns")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,1,"UPns(ns)","UPns", sep = "_")))
next
}
# Pattern: UPns(UP)
if(mydata[gene,1] > 0 & is.na(mydata[gene,2]) & mydata[gene,3] > 0){
mydata_bin[gene,] <- c(0,1,1,"UPns(UP)","UPns")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,1,"UPns(UP)","UPns", sep = "_")))
next
}
# Pattern: DWns(ns)
if(mydata[gene,1] < 0 & is.na(mydata[gene,2]) & is.na(mydata[gene,3])){
mydata_bin[gene,] <- c(0,-1,-1,"DWns(ns)","DWns") # cluster 13
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-1,"DWns(ns)","DWns", sep = "_")))
next
}
# Pattern: DWns(DW)
if(mydata[gene,1] < 0 & is.na(mydata[gene,2]) & mydata[gene,3] < 0){
mydata_bin[gene,] <- c(0,-1,-1,"DWns(DW)","DWns") # cluster 14
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-1,"DWns(DW)","DWns", sep = "_")))
next
}
# Pattern: DWns(UP)
if(mydata[gene,1] < 0 & is.na(mydata[gene,2]) & mydata[gene,3] > 0){
mydata_bin[gene,] <- c(0,-1,-1,"DWns(UP)","DWns") # cluster 14
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-1,"DWns(UP)","DWns", sep = "_")))
next
}
### NA at 3rd column ###
# Pattern: DWUP(ns)
if(mydata[gene,1] < 0 & mydata[gene,2] > 0 & is.na(mydata[gene,3])){
mydata_bin[gene,] <- c(0,-1,0,"DWUP(ns)","DWUP") # cluster 17
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,0,"DWUP(ns)","DWUP", sep = "_")))
next
}
# Pattern: UPDW(ns)
if(mydata[gene,1] > 0 & mydata[gene,2] < 0 & is.na(mydata[gene,3])){
mydata_bin[gene,] <- c(0,1,0,"UPDW(ns)","UPDW")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,0,"UPDW(ns)","UPDW", sep = "_")))
next
}
# Pattern: UPUP(UP)
if(mydata[gene,1] > 0 & mydata[gene,2] > 0 & mydata[gene,3] > 0){
mydata_bin[gene,] <- c(0,1,2,"UPUP(UP)","UPUP")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,2,"UPUP(UP)","UPUP", sep = "_")))
next
}
# Pattern: UPDW(UP)
if(mydata[gene,1] > 0 & mydata[gene,2] < 0 & mydata[gene,3] > 0){
mydata_bin[gene,] <- c(0,1,0.5,"UPDW(UP)","UPDW")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,0.5,"UPDW(UP)","UPDW", sep = "_")))
next
}
# Pattern: UPDW(DW)
if(mydata[gene,1] > 0 & mydata[gene,2] < 0 & mydata[gene,3] < 0){
mydata_bin[gene,] <- c(0,1,-0.5,"UPDW(DW)","UPDW")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,1,-0.5,"UPDW(DW)","UPDW", sep = "_")))
next
}
# Pattern: DWUP(UP)
if(mydata[gene,1] < 0 & mydata[gene,2] > 0 & mydata[gene,3] > 0){
mydata_bin[gene,] <- c(0,-1,0.5,"DWUP(UP)","DWUP")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,0.5,"DWUP(UP)","DWUP", sep = "_")))
next
}
# Pattern: DWUP(DW)
if(mydata[gene,1] < 0 & mydata[gene,2] > 0 & mydata[gene,3] < 0){
mydata_bin[gene,] <- c(0,-1,-0.5,"DWUP(DW)","DWUP")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-0.5,"DWUP(DW)","DWUP", sep = "_")))
next
}
# Pattern: DWDW(DW)
if(mydata[gene,1] < 0 & mydata[gene,2] < 0 & mydata[gene,3] < 0){
mydata_bin[gene,] <- c(0,-1,-2,"DWDW(DW)","DWDW")
print(paste0(pop, ": row ", i, " gene: ", gene, " pattern: ",paste0(0,-1,-2,"DWDW(DW)","DWDW", sep = "_")))
next
}
}
saveRDS(object = mydata_bin, file = paste0(pop,"/Pairwise_tests_significant_Classification_",pop,"_binary.rds"))
mydata_bin_df <- as.data.frame(mydata_bin)
mydata_bin_df$gene <- rownames(mydata_bin_df)
colnames(mydata_bin_df) <- c("C04","C02","C03","Pattern","PatternLight","gene")
m <- melt(mydata_bin_df, measure.vars = c("C04","C02","C03"), id.vars = c("gene","Pattern","PatternLight"))
m$value <- as.numeric(m$value)
mytheme <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.5)),
colhead = list(fg_params=list(cex = 0.5)),
rowhead = list(fg_params=list(cex = 0.5)))
m_cast <- dcast(m,formula = gene + Pattern ~ variable, value.var = 'value')
annotation_table <- as.data.frame(table(m_cast$Pattern))
colnames(annotation_table) <- c("Pattern","Ngenes")
saveRDS(m, paste0(pop, "/Patterns_dataframe_Classification_",pop,".rds"))
saveRDS(m_cast, paste0(pop, "/Patterns_dataframe_casted_Classification_",pop,".rds"))
saveRDS(annotation_table, paste0(pop,"/Annotation_table_Classification_",pop,".rds"))
tmpdf <- as.data.frame(table(m$Pattern))
new_labels <- paste0(tmpdf$Var1, "(", tmpdf$Freq, ")")
names(new_labels) <- tmpdf$Var1
png(paste0(pop, "/",pop, "_patterns_Classification.png"), width = 6, height = 6, units = "in", res = 300)
print(ggplot(m, mapping = aes(x = variable, y = value, color = Pattern, group = gene)) + geom_line() + #, xmin=-12, xmax=-7, ymin=-12, ymax=-7) +
facet_wrap( . ~ Pattern, ncol = 4, labeller = labeller(Pattern = new_labels)
) +
ggtitle(label = paste0("Culture effect - ", pop)) +
theme(plot.title = element_text(hjust = 0.5)))
dev.off()
# lightpattern
m_cast <- dcast(m,formula = gene + PatternLight ~ variable, value.var = 'value')
annotation_table <- as.data.frame(table(m_cast$PatternLight))
colnames(annotation_table) <- c("PatternLight","Ngenes")
saveRDS(m, paste0(pop, "/PatternsLight_dataframe_Classification_",pop,".rds"))
saveRDS(m_cast, paste0(pop, "/PatternsLight_dataframe_casted_Classification_",pop,".rds"))
saveRDS(annotation_table, paste0(pop,"/Annotation_tableLight_Classification_",pop,".rds"))
tmpdf <- as.data.frame(table(m$PatternLight))
new_labels <- paste0(tmpdf$Var1, "(", tmpdf$Freq, ")")
names(new_labels) <- tmpdf$Var1
png(paste0(pop, "/",pop, "_patternsLight_Classification.png"), width = 6, height = 6, units = "in", res = 300)
print(ggplot(m, mapping = aes(x = variable, y = value, color = PatternLight, group = gene)) + geom_line() + #, xmin=-12, xmax=-7, ymin=-12, ymax=-7) +
facet_wrap( . ~ PatternLight, ncol = 4, labeller = labeller(PatternLight = new_labels)
) +
ggtitle(label = paste0("Culture effect - ", pop)) +
theme(plot.title = element_text(hjust = 0.5)))
dev.off()
genes2evaluate[[pop]] <- readLines(con = paste0(marker_dir,"/Common_markers_complete-pairwise_Classification_",pop,".txt"))
expr_matrix[[pop]] <- as.data.frame(AverageExpression(object = subset(obj, cells = WhichCells(object = obj, expression = Classification == pop)),
features = genes2evaluate[[pop]], group.by = "orig.ident")$RNA)
saveRDS(expr_matrix[[pop]], paste0(pop, "/Expression_matrix_Classification_",pop,".rds"))
}
write.xlsx(x = expr_matrix, file = "Expression_matrices_modulated_genes_Classification.xlsx", asTable = T, overwrite = T,
rowNames = T, firstRow = T)
###### heatmaps average expression ######
### with light patterns ###
Patterns_dataframe_casted <- list()
for(pop in names(genes2evaluate)){
Patterns_dataframe_casted[[pop]] <- readRDS(paste0(pop, "/PatternsLight_dataframe_casted_Classification_",pop,".rds"))
}
for(pop in names(genes2evaluate)){
#for(pop in c("0")){
outdir <- paste0(pop,"/")
dir.create(outdir, showWarnings = F)
for(i in unique(Patterns_dataframe_casted[[pop]]$PatternLight)){
ttt <- Patterns_dataframe_casted[[pop]]
goi <- ttt[which(ttt$PatternLight == i),]$gene
if(length(goi) < 3){
next
}
cur.mat <- expr_matrix[[pop]][goi,]
cur.mat <- subset(cur.mat, select = c("C04","C02","C03"))
cur.mat <- as.matrix(cur.mat)
pheatmap::pheatmap(mat = cur.mat,
scale = "none",cluster_cols = F,
display_numbers = T,
main = paste0("Avg Expression Pattern ",i),
filename = paste0(outdir,pop,"_pattern_",i,".pdf"))
}
d <- Patterns_dataframe_casted[[pop]][,c("gene","PatternLight")]
rownames(d) <- d$gene
colnames(d)[2] <- "Pattern"
d$gene <- NULL
d$Pattern <- factor(x = d$Pattern, levels = sort(unique(d$Pattern)))
mycolors <- brewer.pal(n = length(unique(d$Pattern)), name = "Paired")
names(mycolors) <- unique(d$Pattern)
mycolors <- list(cluster = mycolors)
fullmat <- as.matrix(expr_matrix[[pop]][ order(row.names(expr_matrix[[pop]])), ])
pheatmap::pheatmap(fullmat, scale = "row",
show_rownames = F, annotation_row = d,
annotation_colors = mycolors,
filename = paste0(outdir,pop,"_all_clusters.pdf"))
}
enrichmentdir <- "enrichment/"
dir.create(enrichmentdir)
m_t2g <- read.gmt(gmtfile = "msigdb.v2023.1.Hs.symbols.gmt")
#### Perform enrichment #####
for(pop in names(genes2evaluate)){
tmp_obj <- SetIdent(object = obj, value = "Classification")
tmp_obj <- subset(tmp_obj, idents = pop)
myuni <- names(which(rowSums(tmp_obj@assays$RNA@counts) > 0)) # universe with genes expressed in at least 1 cell in current population
writeLines(text = myuni, con = paste0(enrichmentdir,pop, "_universe.txt"))
enrichobj <- list()
for(clus in levels(Patterns_dataframe_casted[[pop]]$PatternLight)){
enrichobj[[paste0("cl_",clus)]] <- enricher(Patterns_dataframe_casted[[pop]][which(Patterns_dataframe_casted[[pop]]$PatternLight == clus),"gene"],
TERM2GENE=m_t2g,
universe = myuni, pvalueCutoff = 0.05)
}
###### MultiComparison ######
genesets_clusters <- list()
for(cl in base::unique(Patterns_dataframe_casted[[pop]]$PatternLight)){
genesets_clusters[[paste0("cl_",cl)]] <- Patterns_dataframe_casted[[pop]][which(Patterns_dataframe_casted[[pop]]$PatternLight == cl),"gene"]
}
xx <- list()
for(ontocat in c("MF","BP","CC")){
xx[[ontocat]] <- compareCluster(genesets_clusters, fun="enrichGO", pvalueCutoff=0.05,
OrgDb='org.Hs.eg.db', keyType = "SYMBOL", universe = myuni,
ont = ontocat)
xx[[ontocat]] <- pairwise_termsim(xx[[ontocat]])
pdf(file = paste0(enrichmentdir,pop,"_EnrichGO_",ontocat,".pdf"), width = 16, height = 12, paper = "a4r")
goplot <- emapplot(xx[[ontocat]], pie="count", cex_category=1, cex_line = 0.4, layout="kk")
print(goplot)
dev.off()
saveRDS(object = xx[[ontocat]] , file = paste0(enrichmentdir, pop, "_enrichGO_multi_", ontocat, ".rds"))
}
saveRDS(object = genesets_clusters, file = paste0(enrichmentdir,pop, "_genesets_clusters_list.rds"))
saveRDS(object = enrichobj, file = paste0(enrichmentdir,pop, "_enrichobj_msigdb.rds"))
}
# Identify the a shared and common effect of the culturing in celltypes as well as cell-specific modulations.
library(Seurat)
library(MAST)
library(reshape2)
library(ggplot2)
library(gridExtra)
library(pheatmap)
library(ComplexHeatmap)
library(RColorBrewer)
library(openxlsx)
library(clusterProfiler)
library(msigdbr)
library(enrichplot)
rootdir <- "CultureEffectClassification/"
# Step 1. Identify classes of genes that fall into the different patterns ####
# Pattern UP: UPUP, nsUP, UPns [consistently UP, late UP response, early UP response]
# Pattern DW: DWDW, nsDW, DWns [consistently DW, late DW response, early DW response]
# Transient UP response: UPDW
# Transient DW response: DWUP
# cicle over populations and store according to categories above #
populations <- c("EryProg","GMP","HSC","HSPC","MCP",
"MEP-cycling_I","MEP-cycling_I",
"MEP-EryProg-I", "MEP-EryProg-II",
"MkProg","MLP","MonoProg",
"Erythrocytes","Prog-cycling_I","Prog-cycling_II"#,"Ribo+"
)
dframe_X_classes <- list()
genes_X_classes <- list()
for(popcell in populations){
mydata <- readRDS(paste0(rootdir,popcell,"/Pairwise_tests_significant_Classification_",popcell,"_binary.rds"))
mydata$Class <- "undetermined"
mydata$Class[which(mydata$PatternLight %in% c("UPUP","nsUP","UPns","nsnsUP"))] <- "UP"
mydata$Class[which(mydata$PatternLight %in% c("DWDW","nsDW","DWns","nsnsDW"))] <- "DW"
mydata$Class[which(mydata$PatternLight %in% c("UPDW"))] <- "trUP"
mydata$Class[which(mydata$PatternLight %in% c("DWUP"))] <- "trDW"
dframe_X_classes[[popcell]] <- split(x = mydata, f = mydata$Class)
for(j in names(dframe_X_classes[[popcell]])){
genes_X_classes[[popcell]][[j]] <- rownames(dframe_X_classes[[popcell]][[j]])
}
}
# do upsetR on specific patterns for all ####
library(UpSetR)
class_X_pop <- list()
for(popcell in populations){
for(k in names(genes_X_classes[[popcell]])){
class_X_pop[[k]][[popcell]] <- genes_X_classes[[popcell]][[k]]
}
}
# create upset input and create chart ####
PaperDir <- paste0(rootdir, "Paper/")
dir.create(PaperDir)
upsetr.input <- list()
upsetr.input_string <- list()
for(j in names(class_X_pop)){
upsetr.input[[j]] <- fromList(class_X_pop[[j]])
rownames(upsetr.input[[j]]) <- unique(unlist(class_X_pop[[j]]))
pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations.pdf"), width = 18, height = 6)
print(upset(upsetr.input[[j]] , order.by = "freq", sets = rev(colnames(upsetr.input[[j]])), keep.order = TRUE, nintersects = NA))
dev.off()
}
class_X_pop_symplified <- list(DW = NULL, UP = NULL,
trUP = NULL, trDW = NULL)
for(k in names(class_X_pop_symplified)){
class_X_pop_symplified[[k]]$MyeLineage <- c(class_X_pop[[k]]$GMP)
class_X_pop_symplified[[k]]$MonoProg <- class_X_pop[[k]]$MonoProg
class_X_pop_symplified[[k]]$MCP <- class_X_pop[[k]]$MCP
class_X_pop_symplified[[k]]$MkEryLineage <- c(class_X_pop[[k]]$EryProg, class_X_pop[[k]]$`MEP-cycling_I`,
class_X_pop[[k]]$`MEP-EryProg-I`, class_X_pop[[k]]$`MEP-EryProg-II`,
class_X_pop[[k]]$MkProg, class_X_pop[[k]]$Erythrocytes)
class_X_pop_symplified[[k]]$ProgCycling <- c(class_X_pop[[k]]$`Prog-cycling_I`,
class_X_pop[[k]]$`Prog-cycling_II`)
class_X_pop_symplified[[k]]$HSC <- c(class_X_pop[[k]]$HSC, class_X_pop[[k]]$MLP)
class_X_pop_symplified[[k]]$HSPC <- c(class_X_pop[[k]]$HSPC)
}
upsetr.input.simple <- list()
upsetr.input.simple_string <- list()
colpalette <- c("#A6CEE3","#B2DF8A","#FDBF6F","#E78AC3","#FFFF99","#E31A1C","darkgrey")
Labelset <- c("HSC","HSPC","MonoProg","MyeLineage","MCP","MkEryLineage","ProgCycling")
names(Labelset) <- colpalette
for(j in names(class_X_pop_symplified)){
upsetr.input.simple[[j]] <- fromList(class_X_pop_symplified[[j]])
rownames(upsetr.input.simple[[j]]) <- unique(unlist(class_X_pop_symplified[[j]]))
myset <- Labelset[Labelset %in% colnames(upsetr.input.simple[[j]])]
pdf(file = paste0(PaperDir, "Pattern_",j,"_UpSetR_by_Populations_simply.pdf"), width = 18, height = 6)
print(upset(upsetr.input.simple[[j]] , order.by = "freq", sets.bar.color = names(myset),
sets = myset,
keep.order = TRUE, nintersects = NA))
dev.off()
}
### Which are the common pathways that are modulated across celltypes upon culturing #####
for(j in c("DW","UP")){
tmpdata <- upsetr.input.simple[[j]]
tmpdata_shared <- tmpdata[rowSums(tmpdata) > 3,]
pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 ,
paste , collapse = "")
upsetr.input.simple_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
for (k in colnames(tmpdata)) {
test <- "0000000"
substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
print(test)
upsetr.input.simple_string[[j]][[k]] <- pattern_data[pattern_data == test]
}
### detailed version
tmpdata <- upsetr.input[[j]]
tmpdata_shared <- tmpdata[rowSums(tmpdata) > 9,]
pattern_data <- apply( tmpdata[ , colnames(tmpdata) ] , 1 ,
paste , collapse = "")
upsetr.input_string[[j]][["shared"]] <- pattern_data[rownames(tmpdata_shared)]
for (k in colnames(tmpdata)) {
test <- "00000000000000"
substr(x = test , start = which(colnames(tmpdata) == k), stop = which(colnames(tmpdata) == k)) <- "1"
print(test)
upsetr.input_string[[j]][[k]] <- pattern_data[pattern_data == test]
}
}
#### Perform enrichment #####
# create universes for combined simple labels ####
universes <- list()
universes_symplified <- list()
for(m in colnames(upsetr.input$DW)){
universes[[m]] <- readLines(paste0(rootdir, "enrichment/",m,"_universe.txt"))
}
universes_symplified$MyeLineage <- unique(universes$GMP)
universes_symplified$MonoProg <- unique(universes$MonoProg)
universes_symplified$MkEryLineage <- unique(c(universes$EryProg, universes$`MEP-cycling_I`,
universes$`MEP-EryProg-I`, universes$`MEP-EryProg-II`,
universes$MkProg, universes$Erythrocytes))
universes_symplified$ProgCycling <- unique(c(universes$`Prog-cycling_I`,
universes$`Prog-cycling_II`))
universes_symplified$HSC <- unique(c(universes$HSC, universes$MLP))
universes_symplified$HSPC <- unique(c(universes$HSPC))
universes_symplified$MCP <- unique(c(universes$MCP))
universes_symplified$shared <- unique(unlist(universes_symplified))
universes$shared <- unique(unlist(universes))
# Perform enrichment #####
enrichmentdir <- "PatternsEnrichments/"
dir.create(enrichmentdir)
m_t2g <- read.gmt(gmtfile = "h.all.v2023.1.Hs.symbols.gmt")
#### Perform enrichment #####
enrichobj <- list()
upsetr.input.simple_names <- list()
for(j in names(upsetr.input.simple_string)){
for(i in names(upsetr.input.simple_string[[j]])){
upsetr.input.simple_names[[j]][[i]] <- names(upsetr.input.simple_string[[j]][[i]])
}
}
for(a in c("DW","UP")){
xx <- compareCluster(upsetr.input.simple_names[[a]], fun="enrichGO", pvalueCutoff=0.05,
OrgDb='org.Hs.eg.db', keyType = "SYMBOL", universe = universes_symplified[["shared"]],
ont = "BP")
saveRDS(xx, file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".rds"))
# if(nrow(xx@compareClusterResult) < 2){
# next
# }
xx <- pairwise_termsim(xx)
pdf(file = paste0(enrichmentdir, "Pattern_enrichment_simplified_BP_",a,".pdf"), width = 16, height = 12, paper = "a4r")
goplot <- emapplot(xx, pie="count", cex_category=1, cex_line = 0.4, layout="kk")
print(goplot)
dev.off()
for(j in names(universes_symplified)){
enrichobj[[j]][[a]] <- enricher(names(upsetr.input.simple_string[[a]][[j]]), TERM2GENE=m_t2g,
universe = universes_symplified[[j]], pvalueCutoff = 0.05)
}
}
saveRDS(enrichobj,"Paper/Hallmarks_enrichments_patternSimple_populations.rds")
saveRDS(upsetr.input_string, file = "Paper/upsetr.input_string.rds")
saveRDS(upsetr.input.simple_string, file = "Paper/upsetr.input.simple_string.rds")
## Creating common plot - HALLMARKS ####
for(i in names(enrichobj)){
for(k in names(enrichobj[[i]])){
enrichobj[[i]][[k]]@result$Pop <- i
enrichobj[[i]][[k]]@result$Direction <- k
enrichobj[[i]][[k]]@result$ID <- gsub(x = enrichobj[[i]][[k]]@result$ID, pattern = "HALLMARK_", replacement = "")
enrichobj[[i]][[k]]@result$Description <- gsub(x = enrichobj[[i]][[k]]@result$Description, pattern = "HALLMARK_", replacement = "")
rownames(enrichobj[[i]][[k]]@result) <- gsub(x = rownames(enrichobj[[i]][[k]]@result), pattern = "HALLMARK_", replacement = "")
}
}
df <- data.frame()
for(i in names(enrichobj)){
for(k in names(enrichobj[[i]])){
df <- rbind.data.frame(df, enrichobj[[i]][[k]]@result)
}
}
df_binarized <- df
df_binarized$TileVal <- 0
df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "UP")] <- 1
df_binarized$TileVal[which(df_binarized$p.adjust < 0.05 & df_binarized$Direction == "DW")] <- -1
library(reshape2)
df_binarized <- df_binarized[df_binarized$TileVal != 0,]
df_binarized_mt <- dcast(data = df_binarized, formula = ID ~ Pop + Direction, value.var = "TileVal")
df_binarized_mt[is.na(df_binarized_mt)] <- 0
rownames(df_binarized_mt) <- df_binarized_mt$ID
df_binarized_mt$ID <- NULL
png(filename = "Paper/Hallmarks_enrichments_pheatmap.png", width = 12, height = 6, units = "in", res = 300)
pheatmap(df_binarized_mt)
dev.off()
library(ggplot2)
png(filename = "Paper/Hallmarks_enrichments_tileplot.png", width = 12, height = 6, units = "in", res = 300)
ggplot(data = df_binarized, mapping = aes(x = Direction, y = ID, fill = TileVal)) + geom_tile() +
facet_grid(. ~ Pop, scales = "free", space = "free") +
scale_fill_distiller(palette = "RdBu", direction = -1)
dev.off()
saveRDS(object = df_binarized_mt, "Paper/Hallmarks_enrichments_pheatmap_data.rds")
saveRDS(object = df_binarized, "Paper/Hallmarks_enrichments_tileplot_data.rds")
library(enrichplot)
UP_PLOT <- cnetplot(enrichobj$shared$UP, showCategory = 20, cex_label_gene = 0.6,
cex_label_category = 1, color_category = "#E31A1C", ) +
ggtitle("Enrichment from UP genes") +
#ggtitle("Enriched hallmarks from UP genes across culturing") +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")
DW_PLOT <- cnetplot(enrichobj$shared$DW, showCategory = 20, cex_label_gene = 0.6,
cex_label_category = 1, color_category = "#1F78B4") +
ggtitle("Enrichment from DW genes") +
#ggtitle("Enriched hallmarks from DW genes across culturing") +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "bottom")
library(gridExtra)
png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.png",
width = 24, height = 12, units = "in", res = 300)
grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.5, 0.5))
dev.off()
png(filename = "Paper/Hallmark_enriched_terms_common_UP_modulated_genes_across_populations.png",
width = 12, height = 12, units = "in", res = 300)
UP_PLOT + theme(plot.title = element_blank())
dev.off()
png(filename = "Paper/Hallmark_enriched_terms_common_DW_modulated_genes_across_populations.png",
width = 12, height = 12, units = "in", res = 300)
DW_PLOT + theme(plot.title = element_blank())
dev.off()
###
png(filename = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations_B.png",
width = 12, height = 9, units = "in", res = 300)
grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
dev.off()
pdf(file = "Paper/Hallmark_enriched_terms_common_UP_DW_modulated_genes_across_populations.pdf",
width = 12, height = 9)
gridExtra::grid.arrange(UP_PLOT, DW_PLOT, ncol = 2, widths= c(0.65, 0.35))
dev.off()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment