--- pdf_document: default author: '' output: html_document: null df_print: paged pdf_document: default word_document: default title: "geing GSEA reactome plot divided by category" --- ```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE} library(ggplot2) library(dplyr) library(stringi) library(tidyverse) library(RColorBrewer) library(circlize) library(ComplexHeatmap) library(reshape2) library(scales) ``` ```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE} folder = ('./data') reactome <- read.csv(paste(folder,'ON_Old-ON_Young-gsea-Pathway.txt',sep = '/'), sep='\t', header = TRUE) ### plot the following path from reactome dt <- reactome dt <- dt[dt$p.adjust <= 0.05,] dt <- dt[order(dt$p.adjust, decreasing = FALSE),] dt <- dt%>% mutate(Color = ifelse(p.adjust > mean(dt[,'p.adjust'] ), "red", "blue")) dt$logp.adjust <- -log10(dt$p.adjust) dt$logp.adjust <- round(dt$logp.adjust,1) dt <- dt[!dt$Description %in% c("Neuronal System","Neurotransmitter receptors and postsynaptic signal transmission"),] dt <- dt[!dt$Description %in% c("Transmission across Chemical Synapses","Muscle contraction","Axon guidance","HIV Life Cycle" ,"HIV Infection","Late Phase of HIV Life Cycle","Host Interactions of HIV factors"),] dt$name <- ifelse(dt$Description == "HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA)", 'HDR through HRR or SSA',dt$Description ) dt$Name <- dt$name #paste0(dt$ID, '-' ,dt$name) l <- c('Hematopoiesis','Metabolic process','Gene expression regulation','DNA repair','Cell cycle', 'Cell cycle','Chromatin conformation','DNA repair', 'Cell cycle','Gene expression regulation','Chromatin conformation','Cellular organization and transport', 'Protein Translation and localization', 'Cell cycle', 'Cell cycle','Metabolic process','Inflammation and Senescence','Gene expression regulation', 'Cell cycle','Inflammation and Senescence','Gene expression regulation','Hematopoiesis', 'Cell cycle','Cell cycle','Gene expression regulation','Inflammation and Senescence','Cell cycle','Inflammation and Senescence', 'DNA repair','Metabolic process','Cellular organization and transport','Cell cycle', 'Cell cycle','Protein Translation and localization', 'Gene expression regulation') dt$Category <- l dt$Category <- factor(dt$Category, levels =c("Cell cycle", "DNA repair", "Inflammation and Senescence", "Gene expression regulation", "Chromatin conformation", "Protein Translation and localization", "Cellular organization and transport", "Metabolic process", "Hematopoiesis")) m <- round(mean(seq(min(dt$logp.adjust),max(dt$logp.adjust),by = 0.1)),1) p <- ggplot(dt, aes(x = NES, y = reorder(Name, NES), fill=logp.adjust)) + geom_bar(stat = "identity",width=0.6)+ # aes(color= logp.adjust))+ #geom_point(aes(size=Count, color= p.adjust))+ #scale_colour_gradient(low="red", high="blue") + scale_fill_gradient(low="red",high="blue", breaks = c(min(dt$logp.adjust),m,max(dt$logp.adjust)), labels = c(min(dt$logp.adjust),m,max(dt$logp.adjust)), limits = c(min(dt$logp.adjust),max(dt$logp.adjust)), name = "-log10(FDR)") + cowplot::theme_cowplot() + labs(y= '',colour="-log10(FDR)")+ #facet_grid(cols = vars(plot.0$Dataset), scales="free_y") + geom_vline(xintercept = 0,linetype="dashed", color = "black",size=1)+ theme(axis.title = element_text(size=6,color = 'black', face='bold'), axis.title.x = element_text(size=12), axis.text.x = element_text(colour="black",size=12),#,face ='bold'), axis.text.y = element_text(colour="black",size=12),#,#face ='bold'), panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"), panel.grid.minor.y = element_blank(), panel.grid.major.y = element_line(colour = "grey80"), panel.grid.minor.x = element_blank(), legend.text=element_text(size=6, color ='black',face='bold'), legend.position="bottom", legend.margin=margin(),#legend.box="vertical", strip.background = element_rect(colour="black", fill="white",size=0.5 ), strip.text = element_text(color='black',face = 'bold', size = 8))+ ggtitle(paste0('')) x <- p+ ggforce::facet_col(~Category,scales = "free_y", space='free') #strip.position = "right") # svg(paste(folder, paste('ON_24',"gsea_Pathway_pos_HM_facet.svg", sep = "-"), sep = "/"), # width=8,height=12,pointsize = 12) # x #+ggtitle('REACTOME PATHWAY') # dev.off() ``` ```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE} ################################################################### #### check if genes in categories from core enrichment are signif ################################################################### ## signif Old vs Young ON (over night) df_ss <- read.table('./data/DESeq-ON_Old-ON_Young-siggene.txt') df_ss <- df_ss[df_ss$FDR < 0.05,] real_sig <- list() for (i in dt$Description){ lg <- dt[dt$Description == i,'core_enrichment'] lg <- str_split(lg, "/")[[1]] real_sig[[i]] <- df_ss[rownames(df_ss) %in% lg,] } ## core enrichment not only signif genes # df_ss <- read.table('/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_1653_RNAseq/results/05-DGE-ON-no-Young2/tables/DESeq-ON_Old-ON_Young-allgene.txt') # real <- list() # for (i in hallmark.full_selected$ID){ # lg <- hallmark.full_selected[hallmark.full_selected$ID == i,'core_enrichment'] # lg <- str_split(lg, "/")[[1]] # real[[i]] <- df_ss[rownames(df_ss) %in% lg,] # } ## heatmap genes in core enrichment doHeatmap2 <- function(df, sinfo, num_row_clu = 2, high_genes = NULL) { hm_matrix = as.matrix(df) cols = list() for (cc in colnames(sinfo)) { cols[[cc]] <- brewer.pal(9, "Set1")[1:length(unique(sinfo[[cc]]))] names(cols[[cc]]) <- rev(unique(sinfo[[cc]])) } ha_col <- columnAnnotation(df = sinfo, col = cols,show_annotation_name = FALSE #annotation_name_side = "right" ) if (is.null(high_genes)) { row_names = TRUE ha_tre = NULL } else { row_names = FALSE gg <- intersect(rownames(hm_matrix), high_genes) gg_index <- which(rownames(hm_matrix) %in% gg) labels <- rownames(hm_matrix)[gg_index] ha_tre <- rowAnnotation(link = anno_mark(at = gg_index, labels = labels, labels_gp = gpar(fontsize = 7), link_width = unit(0, "mm"), which = "row",side = "left")#,padding = unit(0.5, "mm")) ) } hm = Heatmap(matrix = hm_matrix, #col = colorRampPalette(rev(brewer.pal(n = 9, name ="RdBu")))(100), #col = colorRamp2(c(-6, -4, -2, 0, 2, 4, 6), rev(brewer.pal(n = 7, name ="RdBu"))), #col = colorRamp2(c(-3, -2, -1, 0, 1, 2, 3), rev(brewer.pal(n = 7, name ="RdBu"))), col = colorRamp2(c(-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2), rev(brewer.pal(n = 9, name ="RdBu"))), cluster_columns = FALSE,#TRUE, cluster_rows = TRUE, rect_gp = gpar(col = "grey", lwd = 0), border = FALSE, show_row_names = TRUE,#FALSE, show_row_dend = FALSE, #show_row_names = FALSE, #row_names, row_names_gp = gpar(fontsize = 12), show_column_names = TRUE, column_title_gp = gpar(fontsize = 0), row_names_side = 'left', #row_split = as.numeric(levels(obj@meta.data[[clustering]])), #row_split = #cluster_row_slices = TRUE, #column_split = sinfo$condition, #column_split = rep(0:(nrow(hm_matrix) - 1), each = ntop), #column_gap = unit(4, "mm"), #row_title_gp = gpar(fontsize = 12), #row_split = num_row_clu, #row_title_rot = 0, #left_annotation = ha_tre, top_annotation = ha_col, width = 10, height = 10, heatmap_legend_param = list( title = "Z-score" )) return(hm) } doHeatmap3 <- function(df, sinfo, num_row_clu = 2, high_genes = NULL, top_annot =NULL) { hm_matrix = as.matrix(df) rows = list() for (cc in colnames(sinfo)) { rows[[cc]] <- brewer.pal(9, "Set1")[1:length(unique(sinfo[[cc]]))] names(rows[[cc]]) <- rev(unique(sinfo[[cc]])) } ha_row <- rowAnnotation(df = sinfo, col = rows,show_annotation_name = FALSE #annotation_name_side = "right" ) if (is.null(high_genes)) { col_names = TRUE ha_tre = NULL } else { row_names = FALSE gg <- intersect(colnames(hm_matrix), high_genes) gg_index <- which(colnames(hm_matrix) %in% gg) labels <- colnames(hm_matrix)[gg_index] ha_tre <- rowAnnotation(link = anno_mark(at = gg_index, labels = labels, labels_gp = gpar(fontsize = 7), link_width = unit(0, "mm"), which = "row",side = "left")#,padding = unit(0.5, "mm")) ) } hm = Heatmap(matrix = hm_matrix, #col = colorRampPalette(rev(brewer.pal(n = 9, name ="RdBu")))(100), #col = colorRamp2(c(-6, -4, -2, 0, 2, 4, 6), rev(brewer.pal(n = 7, name ="RdBu"))), #col = colorRamp2(c(-3, -2, -1, 0, 1, 2, 3), rev(brewer.pal(n = 7, name ="RdBu"))), col = colorRamp2(c(-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2), rev(brewer.pal(n = 9, name ="RdBu"))), cluster_columns = TRUE,#TRUE, cluster_rows = FALSE, rect_gp = gpar(col = "black", lwd = 0), border = FALSE, show_row_names = FALSE, show_row_dend = FALSE, show_column_dend = FALSE, column_names_gp = gpar(fontsize = 12), show_column_names = TRUE, column_title_gp = gpar(fontsize = 0), #column_dend_side = 'top', column_names_rot = 75, #row_names_side = 'left', #row_split = as.numeric(levels(obj@meta.data[[clustering]])), #row_split = #cluster_row_slices = TRUE, #column_split = sinfo$condition, #column_split = rep(0:(nrow(hm_matrix) - 1), each = ntop), #column_gap = unit(4, "mm"), #row_title_gp = gpar(fontsize = 12), #row_split = num_row_clu, #row_title_rot = 0, #left_annotation = ha_tre, top_annotation = top_annot, left_annotation = ha_row, width = 10, height = 10, heatmap_legend_param = list( title = "Z-score", #at = c(round(min(hm_matrix)),0,round(max(hm_matrix))), legend_direction = "horizontal", legend_width = unit(4, "cm")), ) return(hm) } folder <- "/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_1653_RNAseq/bulk_rnaseq/data" dimicco <- readRDS("data/ON_Old-ON_Young_20220331_175727_DGE.rds") plot.dir <- folder sinfo <- dimicco$sample_info sinfo$condition <- factor(sinfo$condition, levels = c("ON_Young", "ON_Old" )) ## annotation list dt <- arrange(dt,Category) cat_full <- data.frame(separate_rows(dt , core_enrichment,sep='/', convert = TRUE)) cat_full <- unique(cat_full[, c('Description','core_enrichment')]) table(unique(cat_full$core_enrichment)) length(unique(cat_full$core_enrichment)) annotcat <- dcast(cat_full, core_enrichment ~ Description) ## condition rownames(annotcat) <- annotcat$core_enrichment annotcat <- annotcat[,-c(1)] c <- colnames(annotcat) for (val in c) { annotcat[[val]] <- ifelse(annotcat[[val]] != 'NA',val,NA) } ##### all list together gg <- c() for (n in names(real_sig)) { gg <- append(gg,rownames(real_sig[[n]])) } mat_gg <- as.data.frame(dimicco$deseq$rlog)[unique(gg), ] annocat_signif <- annotcat[rownames(mat_gg),] mat_gg_scaled <- t(scale(t(mat_gg))) #dimicco_hm <- doHeatmap2(df = t(mat_gg_scaled), sinfo = sinfo, high_genes =unique(gg)) #rorder <- row_order(dimicco_hm) #mat_gg_scaled_names <- mat_gg_scaled[rorder,] annocat_signif <- annocat_signif[,dt$Description] annocat_signif <- annocat_signif[rownames(mat_gg_scaled ),] # order automatically table(rownames(annocat_signif ) == rownames(mat_gg_scaled)) n <- rep('black',dim(annocat_signif)[2]) names(n) <- colnames(annocat_signif) my_cols <- hue_pal()(length(levels(dt$Category))) names(my_cols) <- levels(dt$Category) colors <- list() for (i in dt$Description) { cn <- dt[dt$Description == i, 'Category'] colors[i] <- my_cols[[cn]] } # ht3 = Heatmap(as.matrix(annocat_signif), name = " ",show_column_names = TRUE, # show_row_names =FALSE, # border = TRUE,row_names_gp = gpar(fontsize = 4), # column_names_gp = gpar(fontsize = 6, rot = 90), # rect_gp = gpar(col = "black", lwd = 0.5), # col = colors ,na_col = "white", # heatmap_legend_param = list(labels = dt$Category ))#c(rep('', dim(annocat_signif)[2]))) ) ## hm of gene in core enrichment and also signif all categories # svg(filename=paste(plot.dir, paste0("HM_full_categories", "_ON_Old_vs_Young.svg"), sep = "/"), # width=12, # height=25, # pointsize=12) # draw(dimicco_hm+ht3, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 60), "mm")) # dev.off() ####################################################### ## hm of gene in core enrichment and also signif all categories reduced version cat_full <- data.frame(separate_rows(dt , core_enrichment,sep='/', convert = TRUE)) cat_full <- unique(cat_full[, c('Category','core_enrichment')]) table(unique(cat_full$core_enrichment)) length(unique(cat_full$core_enrichment)) annotcat <- dcast(cat_full, core_enrichment ~ Category) ## condition rownames(annotcat) <- annotcat$core_enrichment annotcat <- annotcat[,-c(1)] c <- colnames(annotcat) for (val in c) { annotcat[[val]] <- ifelse(annotcat[[val]] != 'NA',val,NA) } annocat_signif <- annotcat[rownames(mat_gg),] #rorder <- row_order(dimicco_hm) #mat_gg_scaled_names <- mat_gg_scaled[rorder,] #annocat_signif <- annocat_signif[,dt$Category] #annocat_signif <- annocat_signif[rownames(mat_gg_scaled ),] # order automatically table(rownames(annocat_signif ) == rownames(mat_gg_scaled)) my_cols <- c(hue_pal()(length(levels(dt$Category)))) names(my_cols) <- levels(dt$Category) # ht3 = Heatmap(as.matrix(annocat_signif), name = " ",show_column_names = TRUE, # show_row_names =FALSE, # border = TRUE,row_names_gp = gpar(fontsize = 12), # column_names_gp = gpar(fontsize = 12, rot = 90), # rect_gp = gpar(col = "black", lwd = 0.5), # col = my_cols ,na_col = "white", # heatmap_legend_param = list(labels = levels(dt$Category) ))#c(rep('', dim(annocat_signif)[2]))) ) # svg(filename=paste(plot.dir, paste0("HM_reduced_categories", "_ON_Old_vs_Young.svg"), sep = "/"), # width=12, # height=25, # pointsize=12) # draw(dimicco_hm+ht3, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 60), "mm")) # dev.off() ## rotate heatmap for (i in dt$Category) { colors[i] <- my_cols[[cn]] } ht3 = HeatmapAnnotation(df= annocat_signif, gp = gpar(col = 'black',lwd=0.5),#which = 'col', #row_names_gp = gpar(fontsize = 12, rot = 90), #rect_gp = gpar(col = "black", lwd = 0.5), border = TRUE, na_col = "white", col = list('colors' = c(colors)),#list('colors' = c(my_cols)), show_legend = FALSE) dimicco_hm <- doHeatmap3(df = t(mat_gg_scaled), sinfo = sinfo, high_genes =unique(gg), top_annot=ht3) #draw(dimicco_hm) # svg(filename=paste(plot.dir, paste0("HM_reduced_categories_reverse", "_ON_Old_vs_Young.svg"), sep = "/"), # width=26,#12, # height=6, #25, # pointsize=12) # #bottom, left, top, right paddings. # draw(dimicco_hm, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 30), "mm")) # dev.off() svg(filename=paste(plot.dir, paste0("HM_reduced_categories_reverse", "_ON_Old_vs_Young_v1.svg"), sep = "/"), width=26,#12, height=6, #25, pointsize=12) #bottom, left, top, right paddings. draw(dimicco_hm, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 30), "mm"),heatmap_legend_side = "top") dev.off() ``` ##### HM of genes in the core enrichment and also signif single HM ```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE} ####################################################### # # for (y in unique(dt$Category)) { ## for each macro category # gg <- c() # for (n in names(real_sig)) { # if (n %in% dt[dt$Category == y,'Description']){ # print(n) # #gg <- rownames(real_sig[[n]]) # gg <- append(gg,rownames(real_sig[[n]])) # } # } # gx <- unique(gg) # mat_gg <- as.data.frame(dimicco$deseq$rlog)[gx, ] # mat_gg_scaled <- t(scale(t(mat_gg))) # dimicco_hm <- doHeatmap2(df = mat_gg_scaled, sinfo = sinfo) # # str(sinfo) # n <- gsub(' ','_',gsub('/','_',n)) # # svg(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.svg"), sep = "/"), # # width=6, # # height=6, # # pointsize=12) # # draw(dimicco_hm) # # dev.off() # # # if (length(gx) > 39) { # png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"), # units = "in", width = 8, height = 25, res = 300) # draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y) # dev.off() # } else if (length(gx) > 22) { # png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"), # units = "in", width = 5, height = 8, res = 300) # draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y) # dev.off() # } else if (length(gx) > 10) { # png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"), # units = "in", width = 5, height = 6, res = 300) # draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y) # dev.off() # } else { # png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"), # units = "in", width = 4, height = 4, res = 300) # draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y) # dev.off() # } # } ``` volcano plot ```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE} ds <- c('ON Old vs ON Young' = '/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_1653_RNAseq/results/05-DGE-ON/tables/DESeq-ON_Old-ON_Young-allgene.txt') ``` ```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE} library(EnhancedVolcano) library(dplyr) library(ggrepel) plotVolcano <- function(dge_res, adj.pval.thr, logfc.thr,titlex) { data <- data.frame(gene = row.names(dge_res), pval = -log10(dge_res$FDR), lfc = dge_res$logFC) data <- na.omit(data) data <- mutate(data, color = case_when(data$lfc > logfc.thr & data$pval > -log10(adj.pval.thr) ~ "Overexpressed", data$lfc < -logfc.thr & data$pval > -log10(adj.pval.thr) ~ "Underexpressed", abs(data$lfc) < logfc.thr | data$pval < -log10(adj.pval.thr) ~ "NonSignificant")) data <- data[order(data$pval, decreasing = TRUE),] highl <- head(subset(data, color == "Overexpressed"), 10) lowl <- head(subset(data, color == "Underexpressed"), 10) tt <- rbind( highl,lowl) vol <- ggplot(data, aes(x = lfc, y = pval, colour = color)) + theme(legend.position = "right") + xlab("log2 Fold Change") + ylab(expression(-log[10]("adjusted p-value"))) + geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray") + geom_point(size = 1, alpha = 0.8, na.rm = T) + scale_color_manual(name = "Expression", values = c(Overexpressed = "#CD4F39", Underexpressed = 'blue',#"#00488b", NonSignificant = "darkgray")) + geom_text_repel(data=tt,aes(x = lfc, y = pval,label=gene),show.legend= FALSE, box.padding = 0.3)+ theme_classic() + theme(axis.text.x = element_text(colour="black",size=12), axis.text.y = element_text(colour="black",size=12), legend.position = "bottom", #legend.justification = 0.05, panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"), panel.grid.minor.y = element_blank(), panel.grid.major.y = element_line(colour = "grey80", linetype = "dashed"), panel.grid.minor.x = element_blank()) + ggtitle(titlex)+theme(plot.title = element_text(size = 12, face = "bold")) return(vol) } adj.pval.thr = 0.05 logfc.thr = 0 titlex = 'OLD vs Young' x <- list() xx <- list() folder = '/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_1653_RNAseq/bulk_rnaseq' for (name in names(ds)) { dt <- read.table(ds[[name]]) x[[name]] <- plotVolcano(dt, 0.05, 0,name) #myvolcano(dt,0.05,0,name) xx[[name]] <- EnhancedVolcano(dt, lab = rownames(dt), x = 'logFC', y = 'PValue')+ ggtitle(name) # svg(filename= paste(folder, paste0(name, '.svg'),sep = "/"), width = 6, height = 6, pointsize=12) plot(x[[name]]) dev.off() #svg(filename= paste(folder, paste0(name, '.svg'),sep = "/"), # width = 6, height = 5, pointsize=12) #plot(xx[[name]]) #dev.off() } x <- list() xx <- list() folder = '/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_1653_RNAseq/results' for (name in names(ds)) { dt <- read.table(ds[[name]]) x[[name]] <- plotVolcano(dt, 0.05, 0,name) #myvolcano(dt,0.05,0,name) xx[[name]] <- EnhancedVolcano(dt, lab = rownames(dt), x = 'logFC', y = 'PValue')+ ggtitle(name) # svg(filename= paste(folder, paste0(name, '_noY2.svg'),sep = "/"), width = 6, height = 6, pointsize=12) plot(x[[name]]) dev.off() } ```