Commit e777651b authored by Teresa Tavella's avatar Teresa Tavella
Browse files

add data and script

parent 45a5f4a0
---
title: "geing GSEA reactome plot divided by category"
author: ""
output:
html_document:
df_print: paged
word_document: default
pdf_document: default
---
```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE}
library(ggplot2)
library(dplyr)
library(stringi)
library(tidyverse)
library(RColorBrewer)
library(circlize)
library(ComplexHeatmap)
library(reshape2)
library(scales)
```
```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE}
folder = ('./data')
reactome <- read.csv(paste(folder,'ON_Old-ON_Young-gsea-Pathway.txt',sep = '/'),
sep='\t', header = TRUE)
### plot the following path from reactome
dt <- reactome
dt <- dt[dt$p.adjust <= 0.05,]
dt <- dt[order(dt$p.adjust, decreasing = FALSE),]
dt <- dt%>% mutate(Color = ifelse(p.adjust > mean(dt[,'p.adjust'] ), "red", "blue"))
dt$logp.adjust <- -log10(dt$p.adjust)
dt$logp.adjust <- round(dt$logp.adjust,1)
dt <- dt[!dt$Description %in% c("Neuronal System","Neurotransmitter receptors and postsynaptic signal transmission"),]
dt <- dt[!dt$Description %in% c("Transmission across Chemical Synapses","Muscle contraction","Axon guidance","HIV Life Cycle" ,"HIV Infection","Late Phase of HIV Life Cycle","Host Interactions of HIV factors"),]
dt$name <- ifelse(dt$Description == "HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA)", 'HDR through HRR or SSA',dt$Description )
dt$Name <- dt$name #paste0(dt$ID, '-' ,dt$name)
l <- c('Hematopoiesis','Metabolic process','Gene expression regulation','DNA repair','Cell cycle',
'Cell cycle','Chromatin conformation','DNA repair',
'Cell cycle','Gene expression regulation','Chromatin conformation','Cellular organization and transport',
'Protein Translation and localization', 'Cell cycle', 'Cell cycle','Metabolic process','Inflammation and Senescence','Gene expression regulation',
'Cell cycle','Inflammation and Senescence','Gene expression regulation','Hematopoiesis',
'Cell cycle','Cell cycle','Gene expression regulation','Inflammation and Senescence','Cell cycle','Inflammation and Senescence',
'DNA repair','Metabolic process','Cellular organization and transport','Cell cycle',
'Cell cycle','Protein Translation and localization',
'Gene expression regulation')
dt$Category <- l
dt$Category <- factor(dt$Category, levels =c("Cell cycle",
"DNA repair",
"Inflammation and Senescence",
"Gene expression regulation",
"Chromatin conformation",
"Protein Translation and localization",
"Cellular organization and transport",
"Metabolic process",
"Hematopoiesis"))
m <- round(mean(seq(min(dt$logp.adjust),max(dt$logp.adjust),by = 0.1)),1)
p <- ggplot(dt, aes(x = NES, y = reorder(Name, NES), fill=logp.adjust)) +
geom_bar(stat = "identity",width=0.6)+ # aes(color= logp.adjust))+
#geom_point(aes(size=Count, color= p.adjust))+
#scale_colour_gradient(low="red", high="blue") +
scale_fill_gradient(low="red",high="blue",
breaks = c(min(dt$logp.adjust),m,max(dt$logp.adjust)),
labels = c(min(dt$logp.adjust),m,max(dt$logp.adjust)),
limits = c(min(dt$logp.adjust),max(dt$logp.adjust)),
name = "-log10(FDR)") +
cowplot::theme_cowplot() + labs(y= '',colour="-log10(FDR)")+
#facet_grid(cols = vars(plot.0$Dataset), scales="free_y") +
geom_vline(xintercept = 0,linetype="dashed", color = "black",size=1)+
theme(axis.title = element_text(size=6,color = 'black', face='bold'),
axis.title.x = element_text(size=12),
axis.text.x = element_text(colour="black",size=12),#,face ='bold'),
axis.text.y = element_text(colour="black",size=12),#,#face ='bold'),
panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_line(colour = "grey80"),
panel.grid.minor.x = element_blank(),
legend.text=element_text(size=6, color ='black',face='bold'),
legend.position="bottom", legend.margin=margin(),#legend.box="vertical",
strip.background = element_rect(colour="black",
fill="white",size=0.5
),
strip.text = element_text(color='black',face = 'bold', size = 8))+ ggtitle(paste0(''))
x <- p+ ggforce::facet_col(~Category,scales = "free_y",
space='free')
#strip.position = "right")
# svg(paste(folder, paste('ON_24',"gsea_Pathway_pos_HM_facet.svg", sep = "-"), sep = "/"),
# width=8,height=12,pointsize = 12)
# x #+ggtitle('REACTOME PATHWAY')
# dev.off()
```
```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE}
###################################################################
#### check if genes in categories from core enrichment are signif
###################################################################
## signif Old vs Young ON (over night)
df_ss <- read.table('./data/DESeq-ON_Old-ON_Young-siggene.txt')
df_ss <- df_ss[df_ss$FDR < 0.05,]
real_sig <- list()
for (i in dt$Description){
lg <- dt[dt$Description == i,'core_enrichment']
lg <- str_split(lg, "/")[[1]]
real_sig[[i]] <- df_ss[rownames(df_ss) %in% lg,]
}
## core enrichment not only signif genes
# df_ss <- read.table('/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_1653_RNAseq/results/05-DGE-ON-no-Young2/tables/DESeq-ON_Old-ON_Young-allgene.txt')
# real <- list()
# for (i in hallmark.full_selected$ID){
# lg <- hallmark.full_selected[hallmark.full_selected$ID == i,'core_enrichment']
# lg <- str_split(lg, "/")[[1]]
# real[[i]] <- df_ss[rownames(df_ss) %in% lg,]
# }
## heatmap genes in core enrichment
doHeatmap2 <- function(df, sinfo, num_row_clu = 2, high_genes = NULL) {
hm_matrix = as.matrix(df)
cols = list()
for (cc in colnames(sinfo)) {
cols[[cc]] <- brewer.pal(9, "Set1")[1:length(unique(sinfo[[cc]]))]
names(cols[[cc]]) <- rev(unique(sinfo[[cc]]))
}
ha_col <- columnAnnotation(df = sinfo,
col = cols,show_annotation_name = FALSE
#annotation_name_side = "right"
)
if (is.null(high_genes)) {
row_names = TRUE
ha_tre = NULL
} else {
row_names = FALSE
gg <- intersect(rownames(hm_matrix), high_genes)
gg_index <- which(rownames(hm_matrix) %in% gg)
labels <- rownames(hm_matrix)[gg_index]
ha_tre <- rowAnnotation(link = anno_mark(at = gg_index, labels = labels, labels_gp = gpar(fontsize = 7),
link_width = unit(0, "mm"),
which = "row",side = "left")#,padding = unit(0.5, "mm"))
)
}
hm = Heatmap(matrix = hm_matrix,
#col = colorRampPalette(rev(brewer.pal(n = 9, name ="RdBu")))(100),
#col = colorRamp2(c(-6, -4, -2, 0, 2, 4, 6), rev(brewer.pal(n = 7, name ="RdBu"))),
#col = colorRamp2(c(-3, -2, -1, 0, 1, 2, 3), rev(brewer.pal(n = 7, name ="RdBu"))),
col = colorRamp2(c(-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2), rev(brewer.pal(n = 9, name ="RdBu"))),
cluster_columns = FALSE,#TRUE,
cluster_rows = TRUE,
rect_gp = gpar(col = "grey", lwd = 0),
border = FALSE,
show_row_names = TRUE,#FALSE,
show_row_dend = FALSE,
#show_row_names = FALSE, #row_names,
row_names_gp = gpar(fontsize = 12),
show_column_names = TRUE,
column_title_gp = gpar(fontsize = 0),
row_names_side = 'left',
#row_split = as.numeric(levels(obj@meta.data[[clustering]])),
#row_split =
#cluster_row_slices = TRUE,
#column_split = sinfo$condition,
#column_split = rep(0:(nrow(hm_matrix) - 1), each = ntop),
#column_gap = unit(4, "mm"),
#row_title_gp = gpar(fontsize = 12),
#row_split = num_row_clu,
#row_title_rot = 0,
#left_annotation = ha_tre,
top_annotation = ha_col,
width = 10, height = 10,
heatmap_legend_param = list(
title = "Z-score"
))
return(hm)
}
doHeatmap3 <- function(df, sinfo, num_row_clu = 2, high_genes = NULL, top_annot =NULL) {
hm_matrix = as.matrix(df)
rows = list()
for (cc in colnames(sinfo)) {
rows[[cc]] <- brewer.pal(9, "Set1")[1:length(unique(sinfo[[cc]]))]
names(rows[[cc]]) <- rev(unique(sinfo[[cc]]))
}
ha_row <- rowAnnotation(df = sinfo,
col = rows,show_annotation_name = FALSE
#annotation_name_side = "right"
)
if (is.null(high_genes)) {
col_names = TRUE
ha_tre = NULL
} else {
row_names = FALSE
gg <- intersect(colnames(hm_matrix), high_genes)
gg_index <- which(colnames(hm_matrix) %in% gg)
labels <- colnames(hm_matrix)[gg_index]
ha_tre <- rowAnnotation(link = anno_mark(at = gg_index, labels = labels, labels_gp = gpar(fontsize = 7),
link_width = unit(0, "mm"),
which = "row",side = "left")#,padding = unit(0.5, "mm"))
)
}
hm = Heatmap(matrix = hm_matrix,
#col = colorRampPalette(rev(brewer.pal(n = 9, name ="RdBu")))(100),
#col = colorRamp2(c(-6, -4, -2, 0, 2, 4, 6), rev(brewer.pal(n = 7, name ="RdBu"))),
#col = colorRamp2(c(-3, -2, -1, 0, 1, 2, 3), rev(brewer.pal(n = 7, name ="RdBu"))),
col = colorRamp2(c(-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2), rev(brewer.pal(n = 9, name ="RdBu"))),
cluster_columns = TRUE,#TRUE,
cluster_rows = FALSE,
rect_gp = gpar(col = "black", lwd = 0),
border = FALSE,
show_row_names = FALSE,
show_row_dend = FALSE,
show_column_dend = FALSE,
column_names_gp = gpar(fontsize = 12),
show_column_names = TRUE,
column_title_gp = gpar(fontsize = 0),
#column_dend_side = 'top',
column_names_rot = 75,
#row_names_side = 'left',
#row_split = as.numeric(levels(obj@meta.data[[clustering]])),
#row_split =
#cluster_row_slices = TRUE,
#column_split = sinfo$condition,
#column_split = rep(0:(nrow(hm_matrix) - 1), each = ntop),
#column_gap = unit(4, "mm"),
#row_title_gp = gpar(fontsize = 12),
#row_split = num_row_clu,
#row_title_rot = 0,
#left_annotation = ha_tre,
top_annotation = top_annot,
left_annotation = ha_row,
width = 10, height = 10,
heatmap_legend_param = list(
title = "Z-score",
at = c(round(min(hm_matrix)),0,round(max(hm_matrix))),
legend_direction = "horizontal",
legend_width = unit(4, "cm")),
)
return(hm)
}
folder <- "/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_1653_RNAseq/bulk_rnaseq/data"
dimicco <- readRDS("data/ON_Old-ON_Young_20220331_175727_DGE.rds")
plot.dir <- folder
sinfo <- dimicco$sample_info
sinfo$condition <- factor(sinfo$condition, levels = c("ON_Young", "ON_Old" ))
## annotation list
dt <- arrange(dt,Category)
cat_full <- data.frame(separate_rows(dt , core_enrichment,sep='/', convert = TRUE))
cat_full <- unique(cat_full[, c('Description','core_enrichment')])
table(unique(cat_full$core_enrichment))
length(unique(cat_full$core_enrichment))
annotcat <- dcast(cat_full, core_enrichment ~ Description) ## condition
rownames(annotcat) <- annotcat$core_enrichment
annotcat <- annotcat[,-c(1)]
c <- colnames(annotcat)
for (val in c) {
annotcat[[val]] <- ifelse(annotcat[[val]] != 'NA',val,NA)
}
##### all list together
gg <- c()
for (n in names(real_sig)) {
gg <- append(gg,rownames(real_sig[[n]]))
}
mat_gg <- as.data.frame(dimicco$deseq$rlog)[unique(gg), ]
annocat_signif <- annotcat[rownames(mat_gg),]
mat_gg_scaled <- t(scale(t(mat_gg)))
dimicco_hm <- doHeatmap2(df = t(mat_gg_scaled), sinfo = sinfo, high_genes =unique(gg))
rorder <- row_order(dimicco_hm)
mat_gg_scaled_names <- mat_gg_scaled[rorder,]
annocat_signif <- annocat_signif[,dt$Description]
annocat_signif <- annocat_signif[rownames(mat_gg_scaled ),] # order automatically
table(rownames(annocat_signif ) == rownames(mat_gg_scaled))
n <- rep('black',dim(annocat_signif)[2])
names(n) <- colnames(annocat_signif)
my_cols <- hue_pal()(length(levels(dt$Category)))
names(my_cols) <- levels(dt$Category)
colors <- list()
for (i in dt$Description) {
cn <- dt[dt$Description == i, 'Category']
colors[i] <- my_cols[[cn]]
}
ht3 = Heatmap(as.matrix(annocat_signif), name = " ",show_column_names = TRUE,
show_row_names =FALSE,
border = TRUE,row_names_gp = gpar(fontsize = 4),
column_names_gp = gpar(fontsize = 6, rot = 90),
rect_gp = gpar(col = "black", lwd = 0.5),
col = colors ,na_col = "white",
heatmap_legend_param = list(labels = dt$Category ))#c(rep('', dim(annocat_signif)[2]))) )
## hm of gene in core enrichment and also signif all categories
# svg(filename=paste(plot.dir, paste0("HM_full_categories", "_ON_Old_vs_Young.svg"), sep = "/"),
# width=12,
# height=25,
# pointsize=12)
# draw(dimicco_hm+ht3, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 60), "mm"))
# dev.off()
#######################################################
## hm of gene in core enrichment and also signif all categories reduced version
cat_full <- data.frame(separate_rows(dt , core_enrichment,sep='/', convert = TRUE))
cat_full <- unique(cat_full[, c('Category','core_enrichment')])
table(unique(cat_full$core_enrichment))
length(unique(cat_full$core_enrichment))
annotcat <- dcast(cat_full, core_enrichment ~ Category) ## condition
rownames(annotcat) <- annotcat$core_enrichment
annotcat <- annotcat[,-c(1)]
c <- colnames(annotcat)
for (val in c) {
annotcat[[val]] <- ifelse(annotcat[[val]] != 'NA',val,NA)
}
annocat_signif <- annotcat[rownames(mat_gg),]
#rorder <- row_order(dimicco_hm)
#mat_gg_scaled_names <- mat_gg_scaled[rorder,]
#annocat_signif <- annocat_signif[,dt$Category]
#annocat_signif <- annocat_signif[rownames(mat_gg_scaled ),] # order automatically
table(rownames(annocat_signif ) == rownames(mat_gg_scaled))
my_cols <- c(hue_pal()(length(levels(dt$Category))))
names(my_cols) <- levels(dt$Category)
ht3 = Heatmap(as.matrix(annocat_signif), name = " ",show_column_names = TRUE,
show_row_names =FALSE,
border = TRUE,row_names_gp = gpar(fontsize = 12),
column_names_gp = gpar(fontsize = 12, rot = 90),
rect_gp = gpar(col = "black", lwd = 0.5),
col = my_cols ,na_col = "white",
heatmap_legend_param = list(labels = levels(dt$Category) ))#c(rep('', dim(annocat_signif)[2]))) )
# svg(filename=paste(plot.dir, paste0("HM_reduced_categories", "_ON_Old_vs_Young.svg"), sep = "/"),
# width=12,
# height=25,
# pointsize=12)
# draw(dimicco_hm+ht3, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 60), "mm"))
# dev.off()
## rotate heatmap
for (i in dt$Category) {
colors[i] <- my_cols[[cn]]
}
ht3 = HeatmapAnnotation(df= annocat_signif,
gp = gpar(col = 'black',lwd=0.5),#which = 'col',
#row_names_gp = gpar(fontsize = 12, rot = 90),
#rect_gp = gpar(col = "black", lwd = 0.5),
border = TRUE, na_col = "white",
col = list('colors' = c(colors)),#list('colors' = c(my_cols)),
show_legend = FALSE)
dimicco_hm <- doHeatmap3(df = t(mat_gg_scaled), sinfo = sinfo, high_genes =unique(gg),
top_annot=ht3)
#draw(dimicco_hm)
# svg(filename=paste(plot.dir, paste0("HM_reduced_categories_reverse", "_ON_Old_vs_Young.svg"), sep = "/"),
# width=26,#12,
# height=6, #25,
# pointsize=12)
# #bottom, left, top, right paddings.
# draw(dimicco_hm, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 30), "mm"))
# dev.off()
svg(filename=paste(plot.dir, paste0("HM_reduced_categories_reverse", "_ON_Old_vs_Young_v1.svg"), sep = "/"),
width=26,#12,
height=6, #25,
pointsize=12)
#bottom, left, top, right paddings.
draw(dimicco_hm, show_annotation_legend = FALSE, padding = unit(c(30, 2, 2, 30), "mm"),heatmap_legend_side = "top")
dev.off()
```
##### HM of genes in the core enrichment and also signif single HM
```{r echo=FALSE,echo=FALSE, warning=FALSE,message=FALSE}
#######################################################
for (y in unique(dt$Category)) { ## for each macro category
gg <- c()
for (n in names(real_sig)) {
if (n %in% dt[dt$Category == y,'Description']){
print(n)
#gg <- rownames(real_sig[[n]])
gg <- append(gg,rownames(real_sig[[n]]))
}
}
gx <- unique(gg)
mat_gg <- as.data.frame(dimicco$deseq$rlog)[gx, ]
mat_gg_scaled <- t(scale(t(mat_gg)))
dimicco_hm <- doHeatmap2(df = mat_gg_scaled, sinfo = sinfo)
str(sinfo)
n <- gsub(' ','_',gsub('/','_',n))
# svg(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.svg"), sep = "/"),
# width=6,
# height=6,
# pointsize=12)
# draw(dimicco_hm)
# dev.off()
#
if (length(gx) > 39) {
png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"),
units = "in", width = 8, height = 25, res = 300)
draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y)
dev.off()
} else if (length(gx) > 22) {
png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"),
units = "in", width = 5, height = 8, res = 300)
draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y)
dev.off()
} else if (length(gx) > 10) {
png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"),
units = "in", width = 5, height = 6, res = 300)
draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y)
dev.off()
} else {
png(filename=paste(plot.dir, paste0("HM_", y, "_ON_Old_vs_Young.png"), sep = "/"),
units = "in", width = 4, height = 4, res = 300)
draw(dimicco_hm, merge_legend = TRUE,heatmap_legend_side = 'top',column_title =y)
dev.off()
}
}
```
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment