diff --git a/Figures_scRNA_seq.R b/Figures_scRNA_seq.R index 83ac1cff951f9cce0c5f6b294d2ee10ee97e0fc7..e5332a96d1b683d6dcd67f72a0e2fdced402f956 100644 --- a/Figures_scRNA_seq.R +++ b/Figures_scRNA_seq.R @@ -9,17 +9,27 @@ library(RColorBrewer) library(scales) library(grid) library(gridExtra) +library(ggpubr) +#devtools::install_github("psyteachr/introdataviz") +library('introdataviz') ## full data all the condition -readRDS('full_csc_2023.rds') +full_obj <- readRDS('/DATA_lustre/DATA/HSC_GeneTherapy/Paper_Ana_GE_2023/full_csc_2023.rds') ## senescence lists -gmt.obj <- read.gmt('senescence_list_paper.gmt') -gmt.obj$term <- factor(gmt.obj$term) -table(gmt.obj$term) +gmt.obj_s <- read.gmt('senescence_list_paper.gmt') +gmt.obj_s$term <- factor(gmt.obj$term) +gmt.obj_s <- gmt.obj_s[gmt.obj_s$term %in% c('FRIDMAN_SENESCENCE_UP', 'SASP_SCHLEICH') +table(gmt.obj_s$term) -ll <- unique(gmt.obj$term) +HALLMARK_TNFA_SIGNALING_VIA_NFKB = c('ABCA1','ACKR3','AREG','ATF3','ATP2B1','B4GALT1','B4GALT5','BCL2A1','BCL3','BCL6','BHLHE40','BIRC2','BIRC3','BMP2','BTG1','BTG2','BTG3','CCL2','CCL20','CCL4','CCL5','CCN1','CCND1','CCNL1','CCRL2','CD44','CD69','CD80','CD83','CDKN1A','CEBPB','CEBPD','CFLAR','CLCF1','CSF1','CSF2','CXCL1','CXCL10','CXCL11','CXCL2','CXCL3','CXCL6','DENND5A','DNAJB4','DRAM1','DUSP1','DUSP2','DUSP4','DUSP5','EDN1','EFNA1','EGR1','EGR2','EGR3','EHD1','EIF1','ETS2','F2RL1','F3','FJX1','FOS','FOSB','FOSL1','FOSL2','FUT4','G0S2','GADD45A','GADD45B','GCH1','GEM','GFPT2','GPR183','HBEGF','HES1','ICAM1','ICOSLG','ID2','IER2','IER3','IER5','IFIH1','IFIT2','IFNGR2','IL12B','IL15RA','IL18','IL1A','IL1B','IL23A','IL6','IL6ST','IL7R','INHBA','IRF1','IRS2','JAG1','JUN','JUNB','KDM6B','KLF10','KLF2','KLF4','KLF6','KLF9','KYNU','LAMB3','LDLR','LIF','LITAF','MAFF','MAP2K3','MAP3K8','MARCKS','MCL1','MSC','MXD1','MYC','NAMPT','NFAT5','NFE2L2','NFIL3','NFKB1','NFKB2','NFKBIA','NFKBIE','NINJ1','NR4A1','NR4A2','NR4A3','OLR1','PANX1','PDE4B','PDLIM5','PER1','PFKFB3','PHLDA1','PHLDA2','PLAU','PLAUR','PLEK','PLK2','PLPP3','PMEPA1','PNRC1','PPP1R15A','PTGER4','PTGS2','PTPRE','PTX3','RCAN1','REL','RELA','RELB','RHOB','RIGI','RIPK2','RNF19B','SAT1','SDC4','SERPINB2','SERPINB8','SERPINE1','SGK1','SIK1','SLC16A6','SLC2A3','SLC2A6','SMAD3','SNN','SOCS3','SOD2','SPHK1','SPSB1','SQSTM1','STAT5A','TANK','TAP1','TGIF1','TIPARP','TLR2','TNC','TNF','TNFAIP2','TNFAIP3','TNFAIP6','TNFAIP8','TNFRSF9','TNFSF9','TNIP1','TNIP2','TRAF1','TRIB1','TRIP10','TSC22D1','TUBB2A','VEGFA','YRDC','ZBTB10','ZC3H12A','ZFP36') +gmt.obj <- list('HALLMARK_TNFA_SIGNALING_VIA_NFKB' = HALLMARK_TNFA_SIGNALING_VIA_NFKB, + 'FRIDMAN_SENESCENCE_UP' = gmt.obj_s[gmt.obj_s$term == 'FRIDMAN_SENESCENCE_UP',], + 'SASP_SCHLEICH' = gmt.obj_s[gmt.obj_s$term == 'SASP_SCHLEICH',]) + + +ll <- gmt.obj full_obj$CSC_clusters <- ifelse(full_obj$CSC_celltype == 'HSC_0', 'HSC-enriched_1', ifelse(full_obj$CSC_celltype == 'HSC_1', 'HSC-enriched_1', @@ -33,10 +43,11 @@ full_obj$CSC_clusters <- ifelse(full_obj$CSC_celltype == 'HSC_0', 'HSC-enriched_ - +#HSC1, HSC2, Myeloid, Neutrophil, Erythroid, Pre Mono full_obj$CSC_clusters <- factor(full_obj$CSC_clusters, levels=c('HSC-enriched_1', 'HSC-enriched_2', - 'PreB/Mono/DC_prog', 'Myeloid-biased_prog', - 'Neutrophil-biased_prog','Erythroid-biased_prog' + 'Myeloid-biased_prog', + 'Neutrophil-biased_prog','Erythroid-biased_prog', + 'PreB/Mono/DC_prog' )) @@ -44,87 +55,200 @@ table( colnames(mix3_obj_new) == colnames(full_obj)) full_obj$neworignames <- ifelse(full_obj$orig.ident == 'RNPneg', 'RNP NEG', ifelse(full_obj$orig.ident == 'GTGFPneg', 'GFP-', ifelse(full_obj$orig.ident == 'GTGFPpos', 'GFP+', - ifelse(full_obj$orig.ident == 'RNPhs', 'RNP HS',full_obj$orig.ident))))) + ifelse(full_obj$orig.ident == 'RNPhs', 'RNP HS',full_obj$orig.ident)))) Idents(full_obj) <- 'neworignames' full_obj <- subset(full_obj, idents = c("GFP+", "GFP-","RNP NEG", "RNP HS")) full_obj$neworignames <- factor(full_obj$neworignames, levels =c("RNP NEG","RNP HS", - "GFP-","GFP+")) + "GFP-","GFP+")) +# for senescence_list_paper.gmt Idents(full_obj) <- 'CSC_clusters' -for (l in 1:length(ll)) { - group <-ll[l] +for (l in names(ll)) { #1:length(ll)) { + group <-ll[l] nn <- gsub('\\)','',gsub('\\(','',gsub(' ','_',group))) nn <- gsub('-','_',nn) - glist <- gmt.obj[gmt.obj$term == group,'gene'] + glist <- gmt.obj[gmt.obj$term == group,'gene'] gmt_features <- list(glist) full_obj <- AddModuleScore( object = full_obj, - features = gmt_features, ## - name = nn, + features = gmt_features, ## + name = nn, )} - clcol <- c("RNP NEG"="black", "RNP HS"="gray36", "GFP-"="grey", "GFP+"="green3") Idents(full_obj) <- 'CSC_clusters' -p <- list() -sasp <- list() -for (n in levels(full_obj$CSC_clusters)){ - sub <- subset(full_obj, idents = n) - p[[n]] <- VlnPlot(object = sub, features = c("FRIDMAN_SENESCENCE_UP1"), - group.by ="neworignames", col = clcol, pt.size = 0,sort = F)+ - geom_boxplot(width=0.2,outlier.shape = NA)+ theme(legend.position="none", - axis.title.x = element_blank())+ - ggtitle(paste0(gsub('_', ' ',n))) - sasp[[n]] <- VlnPlot(object = sub, features = c("SASP_SCHLEICH1"), - group.by ="neworignames", col = clcol, pt.size = 0,sort = F)+ - geom_boxplot(width=0.2,outlier.shape = NA)+ theme(legend.position="none", - axis.title.x = element_blank())+ - ggtitle(paste0(gsub('_', ' ',n))) + +### check signif +check_fun <- function(x,df){ + df$comparison <- 'comp' + for (v in 1:dim(df)[[1]]) { + d1 <- as.character(x[x$group == as.character(df$Var1[v]), 'treatment']) # RNA_pop == group, get the type of treatment + d2 <- as.character(x[x$group == as.character(df$Var2[v]), 'treatment'])# RNA_pop == group + if (d1 == 'control' | d2 == 'control') { + ## keep value in the matrix + df$comparison <- replace(df$comparison,v,'tr_vs_control') + } else if (d1 == 'GFP+' && d2 == 'GFP-' | d1 == 'GFP-' && d2 == 'GFP+') { + df$comparison <- replace(df$comparison,v,'tr_vs_control') + } + + } + return(df) } -## get legend just from one +### for the full object +## plot as boxplot + + +Idents(full_obj) <- 'CSC_clusters' + + +names(ll) +namex <- list('FRIDMAN_SENESCENCE_UP' = 'FRIDMAN SENESCENCE UP', + 'SASP_SCHLEICH' = 'SASP SCHLEICH', + "HALLMARK_TNFA_SIGNALING_VIA_NFKB" = "HALLMARK TNFA SIGNALING VIA NFKB1") + +colrs <- c("RNP NEG"="black", "RNP HS"="gray20", "GFP-"="grey50", "GFP+"="green3") #"RNP H=grey 36 + +for (ix in names(namex)) { + p1 <- list() + p <- list() + melt_005df <- list() + melt_005pp <- list() + meltpp <- list() + pp <- list() + legend = '' + comp <- list() + for (clu in levels(full_obj$CSC_clusters)) { + n <- paste0(ix,'1') + subset <- subset(full_obj, idents=clu) + + antibio <- data.frame(subset@meta.data[[n]], subset@meta.data$neworignames) #data.frame(full_obj@meta.data[[n]], full_obj@meta.data$RNA_pop) + antibio$celltype <- clu + + i <- paste0(ix, '_', clu) + colnames(antibio) <- c('value', 'group', 'celltype') + + antibio$treatment <- ifelse(antibio$group == 'RNP NEG', 'control', as.character(antibio$group)) + + dim(antibio) + alpha_genes <- 0.05 + rownames(antibio) <- rownames(subset@meta.data) + + pp[[i]]<-list(pairwise.wilcox.test(antibio$value,antibio$group, p.adjust.method = "BH", paired = FALSE)) + meltpp[[i]] <- data.frame(melt(pp[[i]][[1]]$p.value)) + meltpp[[i]][is.na(meltpp[[i]]$value),3] <- 100 # substitute na woth zero, third column + if (min(meltpp[[i]]$value) <= alpha_genes){ + melt_005pp[[i]] <- meltpp[[i]]#filter(meltpp[[i]] , value <= alpha_genes) # data.frame(meltpp[[i]][meltpp[[i]]$value <= alpha_genes,]) + + melt_005df[[i]] <- melt_005pp[[i]][complete.cases(melt_005pp[[i]]),] + y_position <- list() + for (y in 1:dim(melt_005df[[i]])[1]) { + x <- as.numeric(paste(c('0.',y), collapse = "")) + y_position[y] <- sample(seq(median(antibio$value)+0.05, max(antibio$value)+0.05, by=0.01),size=1) #as.numeric(max(antibio$value)+0.1)#0.01) + } + melt_005df[[i]]$y.position <- y_position + + df <- melt_005df[[i]] + annotation <- unique(antibio[,c('treatment', 'group')]) + rownames(annotation) <- NULL + df_x <- check_fun(annotation,df) + melt_005df[[i]] <- filter(df_x , comparison != "comp") + melt_005df[[i]]$p.signif <- symnum(melt_005df[[i]]$value, cutpoints = c(0, 0.0001, 0.001, alpha_genes,0.07, 1), + symbols = c( "***", "**", "*",".","ns")) + if( nrow(melt_005df[[i]]) > 0) { + ## substitute name + + ## comparison df + ## create table + comparisons <- data.frame( + .y. = c(rep('len',dim(melt_005df[[i]])[1])), + group1 = melt_005df[[i]]$Var1, #_mod, + group2 = melt_005df[[i]]$Var2, #_mod, + p = melt_005df[[i]]$value, + p.adj = round(melt_005df[[i]]$value,3), + p.format = melt_005df[[i]]$value, + p.signif = melt_005df[[i]]$p.signif, + method = 'Wc', + y.position = unlist(melt_005df[[i]]$y.position), + group = unlist(melt_005df[[i]]$comparison), + type = i + #g1 = melt_005df[[i]]$Var1, + #g2 =melt_005df[[i]]$Var2 + ) %>% + as.tbl() + + cx <- gsub('/','_',clu) + comp[[cx]] <- comparisons[,c('group1', 'group2','p.adj','p.signif', 'type')] #comparisons[,-c(1)] + + p[[i]] <- ggviolin(antibio, x = "group", y = "value",fill='group', + palette =colrs, alpha=0.8,#add='boxplot',add.params = list(fill = "white"), + ylim=c(min(antibio$value),max(antibio$value)+0.1))+ + #stat_summary(fun.y = median, geom='segment', size = 3, colour = "black")+ + geom_boxplot(width=0.3, outlier.shape = NA, col='black', fill='white')+ + geom_signif( + data=comparisons, + aes(xmin=group1, xmax=group2, annotations=p.signif, y_position=y.position), + manual=T, tip_length = 0,textsize=5 #step_increase=comparisons$y.position, + )+ + theme( + legend.position = 'none', + plot.title = element_text(hjust = 0.5), + text = element_text(size = 12), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + axis.title.x = element_blank(), + plot.margin = margin(t = 20, r = 10, b = 10, l = 10, unit = 'pt'), + axis.line = element_line(colour = "black", + size = 0.7, linetype = "solid"), + axis.text.x = element_text(size=12, color='black', angle=45, hjust=1), # angle=0, hjust=1, + axis.text.y = element_text(size=12, color='black') + )+ylab('')+ggtitle(clu) + + } + } + else{ + p[[i]] <- ggviolin(antibio, x = "group", y = "value",fill='group', + palette =colrs,alpha=0.8, + ylim=c(min(antibio$value),max(antibio$value)+0.1))+ + #stat_summary(fun.y = median, size = 3, colour = "black")+ + geom_boxplot(width=0.3,outlier.shape = NA, col='black', fill ='white')+ + theme( + legend.position = 'none', + plot.title = element_text(hjust = 0.5), + text = element_text(size = 12),#8 + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + axis.title.x = element_blank(), + plot.margin = margin(t = 20, r = 10, b = 10, l = 10, unit = 'pt'), + axis.line = element_line(colour = "black", + size = 0.7, linetype = "solid"), + axis.text.x = element_text(size=12, color='black', angle=45, hjust=1), # angle=0, hjust=1, + axis.text.y = element_text(size=12, color='black') + )+ylab('')+ggtitle(clu) + } + } + + + d <- grid.arrange(grobs = list(p[[1]],p[[2]],p[[3]], leg), top =ix,left = textGrob("Module score", rot = 90, vjust = 1),nrow=1) + svg(paste('module_score_wilcoxon_sept2023', paste0(ix, "pairwise_wilcox.svg"), sep='/'), width =20, height = 4, pointsize=12) + plot(d) + dev.off() + + write.xlsx(comp,paste('module_score_wilcoxon_sept2023', paste0(ix, 'pairwise_wilcox.xlsx'), sep='/')) +} + + lg_legend<-function(a.gplot){ tmp <- ggplot_gtable(ggplot_build(a.gplot)) leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") legend <- tmp$grobs[[leg]] return(legend)} -mylegend<- lg_legend(VlnPlot(object = sub, features = c("FRIDMAN_SENESCENCE_UP1"), - group.by ="neworignames", col = clcol, pt.size = 0,sort = F)+ - geom_boxplot(width=0.2,outlier.shape = NA)) - - -svg("senescence_violin.svg", width =20, height = 4, pointsize=12) -grid.arrange(p$`HSC-enriched_1`, p$`HSC-enriched_2`, p$`PreB/Mono/DC_prog`, - p$`Myeloid-biased_prog`, p$`Neutrophil-biased_prog`, p$`Erythroid-biased_prog`, mylegend, - nrow = 1,top = textGrob("FRIDMAN SENESCENCE UP",gp=gpar(fontsize=20,font=3))) -dev.off() - -svg("sasp_violin.svg", width =20, height = 4, pointsize=12) -grid.arrange(sasp$`HSC-enriched_1`, sasp$`HSC-enriched_2`, sasp$`PreB/Mono/DC_prog`, - sasp$`Myeloid-biased_prog`, sasp$`Neutrophil-biased_prog`, sasp$`Erythroid-biased_prog`, mylegend, - nrow = 1,top = textGrob("SASP SCHLEICH",gp=gpar(fontsize=20,font=3))) -dev.off() - - -## RIDGE plot -p <- RidgePlot(object = full_obj, features = c("FRIDMAN_SENESCENCE_UP1"),group.by = "neworignames", cols = clcol)+ theme(legend.position="none", - axis.title.x = element_blank(), - axis.title.y = element_blank()) -svg("senescence_ridge.svg", width =11, height = 6, pointsize=12) -p + ggtitle("FRIDMAN SENESCENCE UP") -dev.off() - -## final sasp -p <- RidgePlot(object = full_obj, features = c("SASP_SCHLEICH1"),group.by = "neworignames", cols = clcol)+ theme(legend.position="none", - axis.title.x = element_blank(), - axis.title.y = element_blank()) -svg("sasp_SCHLEICH_ridge.svg", width =11, height = 6, pointsize=12) -p + ggtitle("SASP SCHLEICH") -dev.off() +leg <- get_legend(p[[1]]) +