Commit 1f735baa authored by Matteo Barcella's avatar Matteo Barcella
Browse files

scRNAseq realted scripts

parent 01d2c18d
# Run monocle analysis
library(Seurat)
library(monocle3)
source('~/Scripts/Monocle/RunMono_basic_from_seurat.R')
library(ggplot2)
aml_dx <- readRDS(file = "/TIGET/scRNA_AML_harmonyzations/Harmony_by_patient_chem_AMLnoWTs/AMLs_har.by.Patient.Chemistry_noWTs_DX.rds")
begin_vector_cd34 <- Cells(subset(x = aml_dx, subset = CD34 > 0))
# Pseudotime analysis with
cds_from_seurat <- list()
cds_from_seurat[["DX_beginCD34pos"]] <- run.mono3.from.seurat(seurat.obj = aml_dx, cdsname = "BeginCD34_pos_cells",
clustername = "RNA_snn_res.0.6",
outfolder = "Monocle3_by_seurat/BeginCD34pos/",
vars.vector = c("Classification", "main_bpe",
"pseudotime", "partition","RNA_snn_res.0.6",
"orig.ident", "PatientID", "GFP_LvsH_full_up"),
begin.cells.vector = begin_vector_cd34)
# re-building images
cd34pos <- readRDS("Monocle3_by_seurat/BeginCD34pos/BeginCD34_pos_cells_cds_from_seurat_monocle.v3.rds")
a <- plot_cells(cds = cd34pos,
color_cells_by = "pseudotime",
label_groups_by_cluster=FALSE, trajectory_graph_segment_size = 0.4,
label_leaves=FALSE, cell_size = 0.2,
graph_label_size = 1,
trajectory_graph_color = "black",
label_branch_points=FALSE)
saveRDS(object = a, file = "DX_Pseudotime_from_seurat_CD34pos.rds")
a <- a + theme_void() +
theme(axis.title = element_blank(), legend.position = "top", legend.key.size = unit(x = 5, "mm"),
axis.line = element_blank(), legend.title = element_blank(), legend.text = element_text(size = 7),
plot.title = element_text(hjust = 0.5, size = 10)) +
ggtitle("Pseudotime")
png(filename = "DX_Pseudotime_from_seurat_CD34pos.png", width = 3, height = 3.3, units = "in", res = 1200)
print(a)
dev.off()
b <- plot_cells(cds = cd34pos,
color_cells_by = "pseudotime",
label_groups_by_cluster=FALSE, trajectory_graph_segment_size = 0.4,
label_leaves=FALSE, cell_size = 0.2,
graph_label_size = 1,
trajectory_graph_color = "black",
label_branch_points=FALSE)
saveRDS(object = b, file = "DX_Pseudotime_from_seurat_CD34pos_noLEGEND.rds")
b <- b + theme_void() +
theme(axis.title = element_blank(), legend.position = "none", legend.key.size = unit(x = 5, "mm"),
axis.line = element_blank(), legend.title = element_blank(), legend.text = element_text(size = 7),
plot.title = element_text(hjust = 0.5, size = 10)) +
ggtitle("Pseudotime")
png(filename = "DX_Pseudotime_from_seurat_CD34pos_noLEGEND.png", width = 3, height = 3.3, units = "in", res = 1200)
print(b)
dev.off()
run.mono3.from.seurat <- function(seurat.obj = NULL, cdsname = "mycds",
clustername = "RNA_snn_res.1.2", outfolder = "Monocle3/",
vars.vector = c("orig.ident","Phase"),
begin.cells.vector = NULL,
begin.cells.profile = c("Phase","G2M"),
pre.cds = NULL, only.plot = FALSE){
library(monocle3)
library(plotly)
library(htmlwidgets)
library(dplyr)
# create output folder
dir.create(path = outfolder, showWarnings = T, recursive = T)
# a helper function to identify the root principal points:
get_earliest_principal_node <- function(cds, myvar.cat, myvar.value, mycells = NULL){
if(is.null(mycells)){
cell_ids <- which(colData(cds)[, myvar.cat] == myvar.value)
}else{
cell_ids <- mycells
}
closest_vertex <-
cds@principal_graph_aux[["UMAP"]]$pr_graph_cell_proj_closest_vertex
closest_vertex <- as.matrix(closest_vertex[colnames(cds), ])
root_pr_nodes <-
igraph::V(principal_graph(cds)[["UMAP"]])$name[as.numeric(names
(which.max(table(closest_vertex[cell_ids,]))))]
root_pr_nodes
}
myout <- base::normalizePath(path = outfolder)
if(isTRUE(only.plot) & is.null(pre.cds)){
stop("Please set your cds already processed including pseudotime")
}
if(isFALSE(only.plot) & is.null(pre.cds)){
gene_annotation <- as.data.frame(rownames(seurat.obj@reductions[["pca"]]@feature.loadings),
row.names = rownames(seurat.obj@reductions[["pca"]]@feature.loadings))
colnames(gene_annotation) <- "gene_short_name"
cell_metadata <- as.data.frame(seurat.obj@meta.data,
row.names = base::rownames(seurat.obj@meta.data))
New_matrix <- seurat.obj@assays[["RNA"]]@counts
New_matrix <- New_matrix[rownames(seurat.obj@reductions[["pca"]]@feature.loadings), ]
expression_matrix <- New_matrix
# building simple cds from seurat data
cds_from_seurat <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
saveRDS(object = cds_from_seurat, file = paste0(myout, "/",cdsname,"_minimal_cds_monocle.v3.rds"))
# Fill partitions field
recreate.partition <- c(rep(1, length(cds_from_seurat@colData@rownames))) # place partition equal to 1.
names(recreate.partition) <- cds_from_seurat@colData@rownames
recreate.partition <- as.factor(recreate.partition)
cds_from_seurat@clusters@listData[["UMAP"]][["partitions"]] <- recreate.partition
# setting default ident as clustername defined as parameter
seurat.obj <- SetIdent(object = seurat.obj, value = clustername)
list_cluster <- seurat.obj@active.ident
names(list_cluster) <- seurat.obj@assays[["RNA"]]@data@Dimnames[[2]]
# Fill required fields
cds_from_seurat@clusters@listData[["UMAP"]][["clusters"]] <- list_cluster
cds_from_seurat@clusters@listData[["UMAP"]][["louvain_res"]] <- "NA"
# set cds umap embedding from seurat embeddings 1:1 correspondence
cds_from_seurat@reducedDims@listData[["UMAP"]] <- seurat.obj@reductions[["umap"]]@cell.embeddings
# set gene loadings
cds_from_seurat@preprocess_aux$gene_loadings <- seurat.obj@reductions[["pca"]]@feature.loadings
# learn graph and do not use partititions (was set to 1 because clustering is not performed by monocle)
cds_from_seurat <- learn_graph(cds_from_seurat, use_partition = F)
# order cells and calculate pseudotime:
pseudo_starting_cells <- get_earliest_principal_node(cds = cds_from_seurat,
myvar.cat = begin.cells.profile[1],
myvar.value = begin.cells.profile[2],
mycells = begin.cells.vector)
cds_from_seurat <- order_cells(cds = cds_from_seurat, root_pr_nodes = pseudo_starting_cells)
for(myvar in vars.vector){
png(filename = paste0(myout, "/",cdsname, "_trajectory_by_", myvar ,".png"),
width = 12, height = 9, units = "in", res = 300)
print(plot_cells(cds_from_seurat,
color_cells_by = myvar,
label_groups_by_cluster=FALSE,
label_leaves=FALSE,
label_branch_points=TRUE,
graph_label_size=2, group_label_size = 6) +
theme(axis.title = element_blank(), axis.text = element_text(size=10),
legend.position = "left", plot.title = element_text(size=18, hjust = 0.5),
plot.subtitle = element_text(size=12, hjust = 0.5)) +
ggtitle(label = paste0(cdsname)))
dev.off()
}
saveRDS(object = cds_from_seurat, file = paste0(myout, "/",cdsname,"_cds_from_seurat_monocle.v3.rds"))
return(cds_from_seurat)
}
if(isTRUE(only.plot) & !is.null(pre.cds)){
for(myvar in vars.vector){
png(filename = paste0(myout, "/",cdsname, "_trajectory_", cdsname, "_by_", myvar ,".png"),
width = 12, height = 9, units = "in", res = 300)
print(plot_cells(cds = pre.cds,
color_cells_by = myvar,
label_groups_by_cluster=FALSE,
label_leaves=FALSE,
label_branch_points=TRUE,
graph_label_size=2, group_label_size = 6) +
theme(axis.title = element_blank(), axis.text = element_text(size=10),
legend.position = "left", plot.title = element_text(size=18, hjust = 0.5),
plot.subtitle = element_text(size=12, hjust = 0.5)) +
ggtitle(label = paste0(cdsname)))
dev.off()
}
}
}
library(Seurat)
library(ggplot2)
library(ComplexHeatmap)
library(dplyr)
library(dittoSeq)
library(ggnewscale)
library(readxl)
Del7_active <- readRDS("Del7_active.rds")
# FIGURE 1
###############
# UMAPplot Del7 No REL
Del7_active$RNA_snn_h.orig.ident_res.0.6 <- factor(Del7_active$RNA_snn_h.orig.ident_res.0.6,
levels = c("0","1","2","3","5","4",
"8","7","6","9","10","11"))
png(filename = "Figure_1E_left.png",
width = 3, height = 3.3, units = "in", res = 900)
DimPlot(Del7_active, group.by = "RNA_snn_h.orig.ident_res.0.6", label = T, label.size = 4,
cols = c(brewer.pal(9, "Set1"), brewer.pal(4, "Set2"))) + theme_void() +
ggtitle("del(7) AML blasts") + theme(plot.title = element_text(size = 11, hjust = 0.5), legend.position = "none")
dev.off()
# DEL7 NO REL Density umaps single patients
data_full <- FetchData(Del7_active, vars = c("RNA_PatientID","RNA_Timepoint",
"AML_up_full","AML_down_full","AML_up_refined",
"RNA_snn_h.orig.ident_res.0.6","UMAPh_1","UMAPh_2"))
PT11AML <- data_full %>% filter(RNA_PatientID == "PT11")
PT17AML <- data_full %>% filter(RNA_PatientID == "PT17")
PT18AML <- data_full %>% filter(RNA_PatientID == "PT18")
UMAPcoords_DXonly <- filter(data_full, RNA_Timepoint == "DX")
UMAPcoords_DXonly$Category <- ifelse(UMAPcoords_DXonly$RNA_PatientID %in% c("PT11", "PT17"), "Refractory",
ifelse(UMAPcoords_DXonly$RNA_PatientID %in% c("PT18"), "postCTX_Rel","Error"))
UMAPcoords_DXnoPT <- select(UMAPcoords_DXonly, -RNA_PatientID)
png("Fig1E_right.png",
res = 700, width = 6.6, height = 3, units = "in")
ggplot(data = UMAPcoords_DXonly, aes(x = UMAPh_1, y = UMAPh_2)) +
geom_point(data = UMAPcoords_DXnoPT, color = "lightgrey", alpha = 0.2, size = 0.1) +
geom_point(aes(color = Category), alpha = 1, size = 0.1) +
scale_color_manual(values = c("#ed7d31","#c00000")) +
new_scale_color() + new_scale_fill() +
stat_density_2d(data = UMAPcoords_DXonly %>% filter(Category == "postCTX_Rel"), aes(color= ..level.., alpha=..level..)) +
stat_density_2d(data = UMAPcoords_DXonly %>% filter(Category == "postCTX_Rel"), aes(fill= ..density.., alpha=..density..), geom = "tile", contour = F) +
scale_color_gradient(low = "#FFFFFF", high = "#5e3213") +
scale_fill_gradient(low = "#FFFFFF", high = "#5e3213") +
new_scale_color() + new_scale_fill() +
stat_density_2d(data = UMAPcoords_DXonly %>% filter(Category == "Refractory"), aes(color= ..level.., alpha=..level..)) +
stat_density_2d(data = UMAPcoords_DXonly %>% filter(Category == "Refractory"), aes(fill= ..density.., alpha=..density..), geom = "tile", contour = F) +
scale_color_gradient(low = "#FFFFFF", high = "#4c0000") +
scale_fill_gradient(low = "#FFFFFF", high = "#4c0000") +
facet_wrap(~RNA_PatientID, ncol = 5) +
theme_void() +
labs(title = "Diagnosis AML blasts") +
theme(legend.position = "none",
plot.title = element_text(size = rel(1.4), hjust = 0.5),
strip.text.x = element_text(size = rel(1.3)))
dev.off()
# FP 126 SIGNATURE
png(filename = "Figure_6A_bottom.png", width = 2.2, height = 3.3, units = "in", res = 700)
ggplot(data_full %>% filter(RNA_Timepoint == "DX") %>% arrange(AML_up_full))+
geom_point(aes(x = UMAPh_1, y = UMAPh_2, color = `AML_up_full`), size =0.01) +
scale_color_viridis_c(option = "inferno", limits = c(-0.1,max(Del7_active$AML_up_full))) +
guides(color = guide_colorbar(reverse = F)) + theme_void() +
labs(title = "126High signature") +
theme(legend.position = "top", legend.title = element_blank(), plot.title = element_text(hjust = 0.5),
legend.key.width=unit(0.33,"in"), legend.key.height = unit(0.2, "in"))
dev.off()
# FIGURE 2
###############
# Density plots of Del7 No REL D14 and D30 blasts
Del7_activedata <- FetchData(object = Del7_active, vars = c("UMAPh_1","UMAPh_2", "RNA_Timepoint"))
png(filename = "Figure_2D.png", width = 3, height = 3.3, units = "in", res = 900)
ggplot(data = Del7_activedata, aes(x = UMAPh_1, y = UMAPh_2)) +
geom_point(data = Del7_activedata %>% filter(RNA_Timepoint == "DX"), color = "lightgrey", alpha = 0.4, size =0.4) +
geom_point(data = Del7_activedata %>% filter(RNA_Timepoint == "D14"), color = "black", size =0.4) +
new_scale_color() + new_scale_fill() +
stat_density_2d(data = Del7_activedata %>% filter(RNA_Timepoint == "D14"), aes(color= ..level.., alpha=..level..)) +
stat_density_2d(data = Del7_activedata %>% filter(RNA_Timepoint == "D14"), aes(fill= ..density.., alpha=..density..), geom = "tile", contour = F) +
scale_color_gradient(low = "#FFFFFF", high = "black") +
scale_fill_gradient(low = "#FFFFFF", high = "black") +
theme_void() +
labs(title = "Day 14 post induction") +
theme(legend.position = "none",
plot.title = element_text(size = rel(1.4), hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
dev.off()
png(filename = "Figure_2E.png", width = 3, height = 3.3, units = "in", res = 900)
ggplot(data = Del7_activedata, aes(x = UMAPh_1, y = UMAPh_2)) +
geom_point(data = Del7_activedata %>% filter(RNA_Timepoint == "DX"), color = "lightgrey", alpha = 0.4, size =0.4) +
geom_point(data = Del7_activedata %>% filter(RNA_Timepoint == "D30"), color = "black", size =0.4) +
new_scale_color() + new_scale_fill() +
stat_density_2d(data = Del7_activedata %>% filter(RNA_Timepoint == "D30"), aes(color= ..level.., alpha=..level..)) +
stat_density_2d(data = Del7_activedata %>% filter(RNA_Timepoint == "D30"), aes(fill= ..density.., alpha=..density..), geom = "tile", contour = F) +
scale_color_gradient(low = "#FFFFFF", high = "black") +
scale_fill_gradient(low = "#FFFFFF", high = "black") +
theme_void() +
labs(title = "Day 30 post induction") +
theme(legend.position = "none",
plot.title = element_text(size = rel(1.4), hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
dev.off()
####################
SIG_FINAL <- read_excel("SIG_FINAL.xlsx")
DGE <- read_excel("NComm_Supplementary_Table3_PDX_BulkRNAseq_DEG.xlsx", sheet = "LvsH")
LymphGO_genes = SIG_FINAL$LYMPH_GO
HSC_LSC_genes = unique(c(SIG_FINAL$JAATINEN_HEMATOPOIETIC_STEM_CELL_UP,
SIG_FINAL$EPPERT_HSC_R, SIG_FINAL$EPPERT_CE_HSC_LSC, SIG_FINAL$EPPERT_LSC_R))
DGE_UP_LVSH <- DGE %>% filter(logFC>0, FDR<0.05)
LymphGO_genes_in_sig_fixed = intersect(LymphGO_genes, DGE_UP_LVSH$Gene)
HSC_LSC_genes_in_sig_fixed = intersect(HSC_LSC_genes, DGE_UP_LVSH$Gene)
Del7_active = AddModuleScore(object = Del7_active, features = list(LymphGO_genes_in_sig_fixed), name = "LymphGO_genes_in_sig_fixed", seed = 123,
nbin = 100, ctrl = 25) # QUESTO QUA
Del7_active$LymphGO_genes_in_sig_fixed = Del7_active$LymphGO_genes_in_sig_fixed1
Del7_active$LymphGO_genes_in_sig_fixed1 = NULL
Del7_active = AddModuleScore(object = Del7_active, features = list(HSC_LSC_genes_in_sig_fixed), name = "HSC_LSC_genes_in_sig_fixed", seed = 123,
nbin = 100, ctrl = 25) # QUESTO QUA
Del7_active$HSC_LSC_genes_in_sig_fixed = Del7_active$HSC_LSC_genes_in_sig_fixed1
Del7_active$HSC_LSC_genes_in_sig_fixed1 = NULL
FeatureScatter(Del7_active, feature1 = "HSC_LSC_genes_in_sig_fixed", feature2 = "LymphGO_genes_in_sig_fixed")
LSC_lymph_corr = FetchData(Del7_active, vars = c("HSC_LSC_genes_in_sig_fixed", "LymphGO_genes_in_sig_fixed", "RNA_Timepoint", "RNA_PatientID"))
LSC_lymph_corr_dx = LSC_lymph_corr %>% filter(RNA_Timepoint == "DX")
library(psych)
Spearman_Corr_Ly_LSC = corr.test(x = LSC_lymph_corr_dx$HSC_LSC_genes_in_sig_fixed,
y = LSC_lymph_corr_dx$LymphGO_genes_in_sig_fixed,
method = "spearman", adjust = "bonferroni", ci = F)
Spearman_Corr_Ly_LSC_data = as.data.frame(cbind(Spearman_Corr_Ly_LSC[["r"]],Spearman_Corr_Ly_LSC[["p"]]))
colnames(Spearman_Corr_Ly_LSC_data) <- c("Spearman_rho", "Bonf_p_adj")
Spearman_Corr_Ly_LSC_data
# Spearman_rho Bonf_p_adj
# 0.6144857 0
LSC_lymph_corr_dx$Outcome_categories <- ifelse(LSC_lymph_corr_dx$RNA_PatientID %in% c("PT11","PT17"), "Chemorefractory", "Post_chemo_relapse")
png(filename = "Figure_S6_right.png",
height = 3.3, width = 3, res = 700, units = "in")
ggplot(LSC_lymph_corr_dx %>% filter(RNA_Timepoint == "DX"), aes(x = HSC_LSC_genes_in_sig_fixed, y = LymphGO_genes_in_sig_fixed,
color = Outcome_categories)) +
geom_point(stat = "identity", size = 0.2) + theme_bw() +
scale_color_manual(values = c(Chemorefractory = "#bf0000",
Persistent_CR = "#6eab45", Post_chemo_relapse = "#eb7d30")) +
theme(panel.grid = element_blank(), plot.subtitle = element_text(hjust = 0.5, size = 8), plot.title = element_text(hjust = 0.5)) +
NoLegend() +
xlab("LSC gene subset") + ylab("Lymphoid gene subset") + ggtitle("126High Sig. subsets",
subtitle = "Spearman rho = 0.6, Bonf. p.adj <0.001")
dev.off()
# FIGURE 6
###################
###################
# VlnPlot 126High
###################
Del7_active <- SetIdent(Del7_active, value = "RNA_Timepoint")
png(filename = "Figure_6B_right.png",
width = 2.2, height = 3.3, units = "in", res = 300)
VlnPlot(Del7_active, idents = "DX", features = "AML_up_full",
group.by = "RNA_PatientID", pt.size = 0,
cols = c("#c00000","#c00000","#ed7d31")) +
geom_hline(yintercept=0, linetype="dashed", color = "black", alpha = 0.5) + ylim(-0.1,0.07) +
theme_bw(base_size = 6) + ggtitle("126High Signature") +
theme(legend.text=element_text(size=rel(1.5)), legend.position = "none") +
theme(plot.title = element_text(size = rel(2), hjust = 0.5),
axis.title.x = element_blank(), axis.text = element_text(size = rel(2)),
panel.grid.major.x = element_blank(), panel.grid.minor = element_blank())
dev.off()
# Percent positive cells at diagnosis
round(prop.table(table(Del7_active$AML_up_full>0.001, Del7_active$RNA_PatientID, Del7_active$RNA_Timepoint)[,,"DX"],
margin = 2)*100, digits = 2)
# PT11 PT17 PT18
# FALSE 806 4927 3834
# TRUE 5859 2997 2013
# PT11 PT17 PT18
# FALSE 12.09 62.18 65.57
# TRUE 87.91 37.82 34.43
# VLNPLOT with PT19 and PT20 module scores computed on the full dataset
#####################
# ALL diagnosis vs Relapses single patients
#####################
AML <- readRDS("AML_object.rds")
# Create unique dataset merging all
PT20_active <- readRDS("PT20_active.rds")
PT19_active <- readRDS("PT19_active.rds")
PT19data <- FetchData(PT19_active, vars = c("RNA_PatientID", "RNA_Timepoint", "AML_up_full"))
PT19data$Outcome_cat <- "Post_chemo_relapse"
PT20data <- FetchData(PT20_active, vars = c("RNA_PatientID", "RNA_Timepoint", "AML_up_full"))
PT20data$Outcome_cat <- "Post_chemo_relapse"
AMLdata <- FetchData(AML,vars = c("PatientID", "Timepoint_corr", "Patient_LvsH_FULL_UP_fixed", "Outcome_categories"))
colnames(AMLdata) <- colnames(PT20data)
ALL <- rbind(AMLdata, PT19data, PT20data)
DiagnosisVLN <- ALL %>% filter(RNA_Timepoint == "DX")
DiagnosisVLN$Facet <- "Diagnosis"
DiagnosisVLNsubset <- DiagnosisVLN %>% group_by(RNA_PatientID) %>% sample_n(1400) %>% ungroup()
DiagnosisVLNsubset$RNA_Timepoint <- ifelse(DiagnosisVLNsubset$Outcome_cat == "Chemorefractory", "REFR",
ifelse(DiagnosisVLNsubset$Outcome_cat == "Persistent_CR", "CR","fut_REL"))
RelapseVLN <- ALL %>% filter(RNA_Timepoint == "REL")
RelapseVLN$Facet <- "Relapse"
RelapseVLNsubset <- RelapseVLN %>% group_by(RNA_PatientID) %>% sample_n(1200) %>% ungroup()
PT08VLN <- ALL %>% filter(RNA_PatientID == "PT08" & RNA_Timepoint %in% c("DX", "REL", "REL_NR"))
PT08VLN$Facet <- "PT08"
PT15VLN <- ALL %>% filter(RNA_PatientID == "PT15")
PT15VLN$Facet <- "PT15"
PT19VLN <- ALL %>% filter(RNA_PatientID == "PT19" & RNA_Timepoint == "REL")
PT19VLN$Facet <- "PT19"
PT20VLN <- ALL %>% filter(RNA_PatientID == "PT20" & RNA_Timepoint == "REL1")
PT20VLN$Facet <- "PT20"
PT20VLN$RNA_Timepoint <- "REL"
VLN <- rbind(RelapseVLNsubset, DiagnosisVLNsubset, PT08VLN, PT15VLN, PT19VLN, PT20VLN)
VLN$Facet <- factor(VLN$Facet, levels = c("Diagnosis", "Relapse", "PT08", "PT15", "PT19", "PT20"))
VLN$RNA_Timepoint <- factor(VLN$RNA_Timepoint, levels = c("CR","fut_REL","REFR" , "DX", "REL", "REL_NR"))
# All diagnosis together
VLN_nooutcome <- VLN
VLN_nooutcome$RNA_Timepoint <- ifelse(VLN_nooutcome$RNA_Timepoint %in% c("CR", "fut_REL", "REFR"), "DX",
paste0(VLN_nooutcome$RNA_Timepoint))
# Diagnosis WITHOUT REFR AML
VLNnoREFR <- VLN
VLNnoREFR <- VLNnoREFR %>% filter(!RNA_PatientID %in% c("PT01", "PT02", "PT13"))
VLNnoREFR$RNA_Timepoint <- ifelse(VLNnoREFR$RNA_Timepoint %in% c("CR", "fut_REL", "REFR"), "DX",
paste0(VLNnoREFR$RNA_Timepoint))
VLNnoREFR_noRELpool <- VLNnoREFR
VLNnoREFR_noRELpool <- VLNnoREFR_noRELpool %>% filter(!Facet == "Relapse")
VLNnoREFR_noRELpool$Facet <- ifelse(VLNnoREFR_noRELpool$Facet == "Diagnosis", "non Ref. \n AML", paste0(VLNnoREFR_noRELpool$Facet))
svg(filename = "Figure_5F.svg", height = 3.3,
width = 4.4)
ggplot(VLNnoREFR_noRELpool) +
geom_violin(aes(x = RNA_Timepoint, y = AML_up_full, fill = RNA_Timepoint)) +
facet_grid(~Facet, scales = "free_x", space = "free_x") +
geom_hline(yintercept=0, linetype="dashed", color = "black", alpha = 0.5) +
scale_fill_manual(values = c(CR = "#70ad47", fut_REL = "#ed7d31", REFR = "#c00000",
DX = "#cfcfcf", REL = "#458fe8", REL_NR = "#19385c")) +
theme_bw(base_size = 6) + ggtitle("126High Signature") +
theme(legend.text=element_text(size=rel(1.5)), legend.position = "none") +
theme(plot.title = element_text(size = rel(2), hjust = 0.5), axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text = element_text(size = rel(1.5)), strip.text = element_text(size = rel(2)),
panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), strip.background = element_blank())
dev.off()
###################
#Featureplots LSC17
#################################
lsc_signatures_scale <- FetchData(Del7_active,
vars = c("DNMT3B","ZBTB46","NYNRIN","ARHGAP22",
"LAPTM4B","MMRN1","DPYSL3","FAM30A",
"CDK6","CPXM1", "SOCS2","SMIM24",
"EMP1","BEX3","CD34","AKR1C3","ADGRG1"),
slot = "scale.data")
lsc_signatures_scale$LSC17 <- lsc_signatures_scale$DNMT3B*0.0874 +
lsc_signatures_scale$ZBTB46*(-0.0347) +
lsc_signatures_scale$NYNRIN*0.00865 +
lsc_signatures_scale$ARHGAP22*(-0.0138) +
lsc_signatures_scale$LAPTM4B*0.00582 +
lsc_signatures_scale$MMRN1*0.0258 +
lsc_signatures_scale$DPYSL3*0.0284 +
lsc_signatures_scale$FAM30A*0.0196 +
lsc_signatures_scale$CDK6*(-0.0704) +
lsc_signatures_scale$CPXM1*(-0.0258) +
lsc_signatures_scale$SOCS2*0.0271 +
lsc_signatures_scale$SMIM24*(-0.0226) +
lsc_signatures_scale$EMP1*0.0146 +
lsc_signatures_scale$BEX3*0.0465 +
lsc_signatures_scale$CD34*0.0338 +
lsc_signatures_scale$AKR1C3*(-0.0402) +
lsc_signatures_scale$ADGRG1*0.0501
Del7_active <- AddMetaData(object = Del7_active, metadata = lsc_signatures_scale$LSC17, col.name = "LSC17")
data_full <- FetchData(Del7_active, vars = c("RNA_Timepoint", "LSC17", "UMAPh_1", "UMAPh_2", "AML_up_full"))
png(filename = "Figure_S6C_right.png", width = 2.2, height = 3.3, units = "in", res = 700)
ggplot(data_full %>% filter(RNA_Timepoint == "DX") %>% arrange(LSC17))+
geom_point(aes(x = UMAPh_1, y = UMAPh_2, color = `LSC17`), size =0.01) +
scale_color_viridis_c(option = "inferno", limits = c(min(Del7_active$LSC17),max(Del7_active$LSC17))) +
guides(color = guide_colorbar(reverse = F)) + theme_void() +
labs(title = "LSC17") +
theme(legend.position = "top", legend.title = element_blank(), plot.title = element_text(hjust = 0.5),
legend.key.width=unit(0.33,"in"), legend.key.height = unit(0.2, "in"))
dev.off()
library(dittoSeq)
AML <- SetIdent(AML, value = "RNA_snn_res.0.6")
dittoBarPlot(AML, group.by = "RNA_snn_res.0.6", var = "Timepoint_corr", cells.use = WhichCells(AML, idents = "7"))
dittoBarPlot(AML, group.by = "RNA_snn_res.0.6", var = "PatientID", cells.use = WhichCells(AML, idents = "7"))
dittoBarPlot(AML, group.by = "PatientID", var = "Timepoint_corr", cells.use = WhichCells(AML, idents = "7"))
# Figure 6
###################
# Dotplot
###################
Del7_active <- SetIdent(Del7_active, value = "RNA_snn_h.orig.ident_res.0.6")
plotinfo = dittoSeq::dittoDotPlot(object = Del7_active, vars = c("JCHAIN", "LTB", "DNTT", "CD36", "CD34",
"CD99", "TNFRSF4", "BTG1", "GUCY1A1", "FAM30A", "CDK6", "INKA1", "AML_up_full"),
group.by = "RNA_snn_h.orig.ident_res.0.6",
ylab = "LSC clusters",
data.out = T, cells.use = WhichCells(Del7_active, idents = c("10","0","1","5","9")))
png(filename = "Figure_6F.png", width = 9.9, height = 6, units = "in", res=300)
ggplot(data = plotinfo$data, aes(x = var, y = grouping)) +
geom_point(aes(color = color, size = size)) +
scale_color_gradient2(high = "#b31b2c", mid = "white", low = "#2268ad") +
theme_bw() + theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = rel(1.25)),
axis.text.y = element_text(size = rel(1.5))) +
xlab("Top 15 cluster markers by logFC") + ylab("LSC subclusters") +
labs(color = paste("Relative","expression",sep="\n"), size = paste("Percent","expression",sep="\n"))
dev.off()
# Creating GSEA plot - paper
# Loading libraries
library(reshape2)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(gplots)
library(pheatmap)
library(Seurat)
# Step 1. Melt data in order to combine easily all the dataframes
gsea_df_d14_d30_vs_dx <- readRDS("Figure_2C_6I_d14d30vsdx_data.rds")
gsea_df_rel_vs_dx <- readRDS("Figure_2C_6I_relvsdx_pt0815_data.rds")
gsea_df_relnr_vs_rel <- readRDS("Figure_2C_6I_relnrvsrel_pt08_data.rds")
gsea_df <- append(gsea_df_d14_d30_vs_dx,gsea_df_rel_vs_dx)
gsea_df <- append(gsea_df,gsea_df_relnr_vs_rel)
gsea_df_melt <- list()
counter <- 1
names(gsea_df) <- gsub(pattern = "cluster_", replacement = "cl", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "_vs_DX", replacement = "vsDX", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "RELNR_vs_REL", replacement = "RELNRvsREL", x = names(gsea_df))
for (i in names(gsea_df)) {
comparison <- unlist(strsplit(x = i, split = "_"))[1]
cluster <- unlist(strsplit(x = i, split = "_"))[2]
patient <- unlist(strsplit(x = i, split = "_"))[3]
keyval <- i
gsea_df[[i]]$comparison <- comparison
gsea_df[[i]]$cluster <- cluster
gsea_df[[i]]$patient <- patient
gsea_df[[i]]$keyval <- keyval
gsea_df_melt[[i]] <- melt(data = gsea_df[[i]], id.vars = c("ID", "comparison","patient", "keyval", "cluster", "p.adjust"),
measure.vars = c("NES"))
gsea_df_melt[[i]]$value <- round(gsea_df_melt[[i]]$value, digits = 2)
counter <- counter + 1
}
total <- bind_rows(gsea_df, .id = "column_label")
total_melt <- bind_rows(gsea_df_melt, .id = "column_label")
total_melt_filtered <- subset(x = total_melt, subset = p.adjust < 0.1)
myabs <- max(abs(x = total_melt$value))
total_melt_filtered_back <- total_melt_filtered
total_melt_filtered$ID <- as.factor(total_melt_filtered$ID)
total_melt_filtered$comparison <- as.factor(total_melt_filtered$comparison)
total_melt_filtered$patient <- as.factor(total_melt_filtered$patient)
total_melt_filtered$cluster <- as.factor(total_melt_filtered$cluster)
uu <- melt(data = dcast(data = total_melt_filtered, formula = ID ~ keyval, value.var = 'value'), id.vars = 'ID')
for (i in 1:nrow(uu)) {
uu$comparison[i] <- unlist(strsplit(x = as.character(uu$variable)[i], split = "_"))[1]
uu$cluster[i] <- unlist(strsplit(x = as.character(uu$variable)[i], split = "_"))[2]
uu$patient[i] <- unlist(strsplit(x = as.character(uu$variable)[i], split = "_"))[3]
}
uu$ID <- as.factor(uu$ID)
uu$comparison <- as.factor(uu$comparison)
uu$cluster <- as.factor(uu$cluster)
uu$patient <- as.factor(uu$patient)
dall <- uu
dall$cluster <- droplevels(dall$cluster)
# 1. drop levels of variable and comparison
dall$comparison <- droplevels(dall$comparison)
dall$variable <- droplevels(dall$variable)
# 2. reorder variable based on cluster factor order
dall$variable <- factor(x = dall$variable,
levels = unique(as.character(dall[ order(dall$cluster), ]$variable)))
dall_copy <- dall
######## Custom order of IDs and comparisons #######
dall$ID <- gsub(x = dall$ID, pattern = "HALLMARK_", replacement = "")
terms_order <- c("HEDGEHOG_SIGNALING",
"MTORC1_SIGNALING",
"KRAS_SIGNALING_DN",
"KRAS_SIGNALING_UP",
"WNT_BETA_CATENIN_SIGNALING",
"TGF_BETA_SIGNALING",
"PI3K_AKT_MTOR_SIGNALING",
"EPITHELIAL_MESENCHYMAL_TRANSITION",
"APICAL_JUNCTION",
"APICAL_SURFACE",
"COAGULATION",
"COMPLEMENT",
"ALLOGRAFT_REJECTION",
"IL2_STAT5_SIGNALING",
"IL6_JAK_STAT3_SIGNALING",
"INFLAMMATORY_RESPONSE",
"INTERFERON_GAMMA_RESPONSE",
"INTERFERON_ALPHA_RESPONSE",
"TNFA_SIGNALING_VIA_NFKB",
"E2F_TARGETS",
"G2M_CHECKPOINT",
"MITOTIC_SPINDLE",
"MYC_TARGETS_V2",
"MYC_TARGETS_V1",
"APOPTOSIS",
"UV_RESPONSE_DN",
"UV_RESPONSE_UP",
"P53_PATHWAY",
"DNA_REPAIR",
"UNFOLDED_PROTEIN_RESPONSE",
"PEROXISOME",
"GLYCOLYSIS",
"HEME_METABOLISM",
"HYPOXIA",
"REACTIVE_OXYGEN_SPECIES_PATHWAY",
"OXIDATIVE_PHOSPHORYLATION",
"ADIPOGENESIS",
"ANGIOGENESIS",
"CHOLESTEROL_HOMEOSTASIS",
"ESTROGEN_RESPONSE_EARLY",
"ESTROGEN_RESPONSE_LATE",
"ANDROGEN_RESPONSE",
"FATTY_ACID_METABOLISM",
"XENOBIOTIC_METABOLISM")
# NOT PRESENT IN FINAL IMAGE:
# [1] "MYOGENESIS" "SPERMATOGENESIS"
dall <- dall[!dall$ID %in% c("MYOGENESIS","SPERMATOGENESIS",
"BILE_ACID_METABOLISM",
"NOTCH_SIGNALING",
"PROTEIN_SECRETION"),]
dall$ID <- factor(dall$ID, levels = terms_order)
# variable order according to paper
variable_order <- c("D14vsDX_cl0_PT01",
"D14vsDX_cl3_PT01",
"D14vsDX_cl3_PT13",
"D14vsDX_cl3_PT09",
"D14vsDX_cl3_PT06",
"D14vsDX_cl12_PT10",
"D14vsDX_cl9_PT13",
"D14vsDX_cl7_PT01",
"D14vsDX_cl7_PT13",
"D14vsDX_cl7_PT08",
"D14vsDX_cl7_PT07",
"D14vsDX_cl2_PT01",
"D14vsDX_cl2_PT13",
"D14vsDX_cl10_PT01",
"D14vsDX_cl6_PT01",
"D14vsDX_cl6_PT13",
"D14vsDX_cl6_PT10",
"D30vsDX_cl11_PT01",
"D30vsDX_cl11_PT13",
"D30vsDX_cl0_PT01",
"D30vsDX_cl0_PT13",
"D30vsDX_cl3_PT13",
"D30vsDX_cl1_PT13",
"D30vsDX_cl11_PT10",
"D30vsDX_cl3_PT10",
"D30vsDX_cl9_PT13",
"D30vsDX_cl7_PT13",
"D30vsDX_cl9_PT09",
"D30vsDX_cl9_PT12",
"D30vsDX_cl2_PT01",
"D30vsDX_cl2_PT13",
"D30vsDX_cl2_PT09",
"D30vsDX_cl6_PT13",
"D30vsDX_cl6_PT09",
"D30vsDX_cl8_PT08",
"D30vsDX_cl4_PT12",
"RELvsDX_cl11_PT15",
"RELvsDX_cl0_PT08",
"RELvsDX_cl0_PT15",
"RELvsDX_cl3_PT08",
"RELvsDX_cl3_PT15",
"RELvsDX_cl12_PT08",
"RELvsDX_cl1_PT08",
"RELvsDX_cl1_PT15",
"RELvsDX_cl9_PT08",
"RELvsDX_cl9_PT15",
"RELvsDX_cl7_PT08",
"RELvsDX_cl7_PT15",
"RELvsDX_cl2_PT08",
"RELvsDX_cl2_PT15",
"RELvsDX_cl10_PT15",
"RELvsDX_cl6_PT15",
"RELvsDX_cl5_PT15",
"RELNRvsREL_cl11_PT08",
"RELNRvsREL_cl0_PT08",
"RELNRvsREL_cl3_PT08",
"RELNRvsREL_cl12_PT08",
"RELNRvsREL_cl1_PT08",
"RELNRvsREL_cl9_PT08",
"RELNRvsREL_cl7_PT08",
"RELNRvsREL_cl2_PT08",
"RELNRvsREL_cl10_PT08"
)
dall$variable <- factor(dall$variable, levels = variable_order)
saveRDS(dall, "dall_ordered_no_grouping.rds")
dall$comparison <- factor(dall$comparison, levels = c("D14vsDX","D30vsDX","RELvsDX","RELNRvsREL"))
#### grouping terms ####
terms2group <- read.table(file = "Term_2_groups.txt", sep =",", header = T)
dall <- merge.data.frame(x = dall, y = terms2group, by.x = "ID", by.y = "TERM", all.x = T)
dall$TGROUP <- factor(x = dall$TGROUP, levels = rev(c("A","B","C","D","E","G")))
comparisons2group <- read.table(file = "Comparison_groups.txt", sep =",", header = T)
dall <- merge.data.frame(x = dall, y = comparisons2group, by.x = "variable", by.y = "COMP", all.x = T)
dall$CGROUP <- factor(x = dall$CGROUP, levels = c("C1","C2","C3","C4","C5","C6","C7","C8"))
c <- ggplot(data = dall, mapping = aes(x = variable, y = ID)) +
facet_grid(TGROUP ~ comparison + CGROUP, scales = "free", space = "free") +
geom_tile(aes(fill=value), width=0.70, height=0.70, color="white", size = 0.30) + labs(fill = "NES") +
ggtitle(label = "GSEA hallmarks", subtitle = " Intracluster - res 0.6 padj < 0.1") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), strip.text.x = element_text(size = 0),
strip.text.y = element_blank(), strip.background = element_blank(),
plot.title = element_text(hjust = 0), plot.subtitle = element_text(hjust = 0),
axis.ticks = element_blank(), axis.text = element_text(size = 7), axis.title = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "white")) +
scale_fill_gradient2(na.value = '#ECECED', limits=c(-3.7, 3.7), breaks=seq(-3,3,by=1),
low='#005295',high='#770000', mid = 'white')
svg(filename = "GSEA_hallmarks_intracluster_res.0.6_paper_timepoint_comparisons_padj_0.1.svg",
width = 12, height = 9)
c
dev.off()
png(filename = "GSEA_hallmarks_intracluster_res.0.6_paper_timepoint_comparisons_padj_0.1.png",
width = 12, height = 9, res = 900, units = "in")
c
dev.off()
COMP,CGROUP
D14vsDX_cl0_PT01,C1
D14vsDX_cl3_PT01,C1
D14vsDX_cl3_PT13,C1
D14vsDX_cl3_PT09,C1
D14vsDX_cl3_PT06,C1
D14vsDX_cl12_PT10,C1
D14vsDX_cl9_PT13,C2
D14vsDX_cl7_PT01,C2
D14vsDX_cl7_PT13,C2
D14vsDX_cl7_PT08,C2
D14vsDX_cl7_PT07,C2
D14vsDX_cl2_PT01,C2
D14vsDX_cl2_PT13,C2
D14vsDX_cl10_PT01,C2
D14vsDX_cl6_PT01,C2
D14vsDX_cl6_PT13,C2
D14vsDX_cl6_PT10,C2
D30vsDX_cl11_PT01,C3
D30vsDX_cl11_PT13,C3
D30vsDX_cl0_PT01,C3
D30vsDX_cl0_PT13,C3
D30vsDX_cl3_PT13,C3
D30vsDX_cl1_PT13,C3
D30vsDX_cl11_PT10,C3
D30vsDX_cl3_PT10,C3
D30vsDX_cl9_PT13,C4
D30vsDX_cl7_PT13,C4
D30vsDX_cl9_PT09,C4
D30vsDX_cl9_PT12,C4
D30vsDX_cl2_PT01,C4
D30vsDX_cl2_PT13,C4
D30vsDX_cl2_PT09,C4
D30vsDX_cl6_PT13,C4
D30vsDX_cl6_PT09,C4
D30vsDX_cl8_PT08,C4
D30vsDX_cl4_PT12,C4
RELvsDX_cl11_PT15,C5
RELvsDX_cl0_PT08,C5
RELvsDX_cl0_PT15,C5
RELvsDX_cl3_PT08,C5
RELvsDX_cl3_PT15,C5
RELvsDX_cl12_PT08,C5
RELvsDX_cl1_PT08,C5
RELvsDX_cl1_PT15,C5
RELvsDX_cl9_PT08,C6
RELvsDX_cl9_PT15,C6
RELvsDX_cl7_PT08,C6
RELvsDX_cl7_PT15,C6
RELvsDX_cl2_PT08,C6
RELvsDX_cl2_PT15,C6
RELvsDX_cl10_PT15,C6
RELvsDX_cl6_PT15,C6
RELvsDX_cl5_PT15,C6
RELNRvsREL_cl11_PT08,C7
RELNRvsREL_cl0_PT08,C7
RELNRvsREL_cl3_PT08,C7
RELNRvsREL_cl12_PT08,C7
RELNRvsREL_cl1_PT08,C7
RELNRvsREL_cl9_PT08,C8
RELNRvsREL_cl7_PT08,C8
RELNRvsREL_cl2_PT08,C8
RELNRvsREL_cl10_PT08,C8
\ No newline at end of file
TERM,TGROUP
MTORC1_SIGNALING,A
KRAS_SIGNALING_DN,A
KRAS_SIGNALING_UP,A
WNT_BETA_CATENIN_SIGNALING,A
TGF_BETA_SIGNALING,A
PI3K_AKT_MTOR_SIGNALING,A
EPITHELIAL_MESENCHYMAL_TRANSITION,A
APICAL_JUNCTION,A
APICAL_SURFACE,A
COAGULATION,B
COMPLEMENT,B
ALLOGRAFT_REJECTION,B
IL2_STAT5_SIGNALING,B
IL6_JAK_STAT3_SIGNALING,B
INFLAMMATORY_RESPONSE,B
INTERFERON_GAMMA_RESPONSE,B
INTERFERON_ALPHA_RESPONSE,B
TNFA_SIGNALING_VIA_NFKB,B
E2F_TARGETS,C
G2M_CHECKPOINT,C
MITOTIC_SPINDLE,C
MYC_TARGETS_V2,C
MYC_TARGETS_V1,C
APOPTOSIS,D
UV_RESPONSE_DN,D
UV_RESPONSE_UP,D
P53_PATHWAY,D
DNA_REPAIR,D
UNFOLDED_PROTEIN_RESPONSE,D
PEROXISOME,D
GLYCOLYSIS,E
HEME_METABOLISM,E
HYPOXIA,E
REACTIVE_OXYGEN_SPECIES_PATHWAY,E
OXIDATIVE_PHOSPHORYLATION,E
ADIPOGENESIS,G
CHOLESTEROL_HOMEOSTASIS,G
ESTROGEN_RESPONSE_EARLY,G
ESTROGEN_RESPONSE_LATE,G
ANDROGEN_RESPONSE,G
FATTY_ACID_METABOLISM,G
XENOBIOTIC_METABOLISM,G
\ No newline at end of file
# Creating GSEA plot - paper
# Loading libraries
library(reshape2)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(gplots)
library(pheatmap)
library(Seurat)
del7 <- readRDS("Del7.rds")
del7$TP_CL_PT <- paste(del7$RNA_Timepoint, del7$RNA_snn_h.orig.ident_res.0.6, del7$RNA_PatientID,sep = "_")
del7$TP_CL_PT_fake <- paste0(del7$RNA_Timepoint, "vsDX_cl", del7$RNA_snn_h.orig.ident_res.0.6,"_", del7$RNA_PatientID)
a <- FetchData(object = del7, c("TP_CL_PT_fake", "AML_up_full"))
comp_order <- a %>% group_by(TP_CL_PT_fake) %>% summarise(median_sig = median(AML_up_full)) %>% arrange(desc(median_sig))
comp_order <- comp_order[grep(pattern = "DXvsDX", invert = T, value = F, x = comp_order$TP_CL_PT_fake),]
comp_order <- comp_order$TP_CL_PT_fake
# Del7 - plots
df_list <- list()
for (pt in c("PT11","PT17","PT18")) {
gseadata <- readRDS(paste0("Figure_2F_",pt,"_data.rds"))
for (comparison in names(gseadata)) {
for(cl in names(gseadata[[comparison]])){
clf <- gsub(x = cl, pattern = "_", replacement = "")
df_list[[paste(pt,comparison,clf, sep = "_")]] <- gseadata[[comparison]][[cl]]@result
df_list[[paste(pt,comparison,clf, sep = "_")]]$patient <- pt
df_list[[paste(pt,comparison,clf, sep = "_")]]$cluster <- clf
df_list[[paste(pt,comparison,clf, sep = "_")]]$comparison <- comparison
df_list[[paste(pt,comparison,clf, sep = "_")]]$keyval <- paste(df_list[[paste(pt,comparison,clf, sep = "_")]]$comparison,
df_list[[paste(pt,comparison,clf, sep = "_")]]$cluster,
df_list[[paste(pt,comparison,clf, sep = "_")]]$patient, sep = "_")
}
}
}
gsea_df <- do.call(rbind, df_list)
gsea_df_melt <- melt(data = gsea_df, id.vars = c("ID", "comparison","patient", "keyval", "cluster", "p.adjust"),
measure.vars = c("NES"))
gsea_df_melt <- subset(x = gsea_df_melt, subset = p.adjust < 0.1)
gsea_df_melt_D14_D30 <- subset(x = gsea_df_melt, subset = comparison %in% c("D14vsDX","D30vsDX"))
gsea_filtered <- list(D14_D30vsDX = gsea_df_melt_D14_D30)
terms2group <- read.table(file = "Figure_2F_term2groups.txt", sep =",", header = T)
for(comp in names(gsea_filtered)){
df <- gsea_filtered[[comp]]
df$ID <- as.factor(gsub(df$ID, pattern = "HALLMARK_", replacement = ""))
df$comparison <- as.factor(df$comparison)
df$patient <- as.factor(df$patient)
df$cluster <- as.factor(df$cluster)
df.melt <- melt(data = dcast(data = df, formula = ID ~ keyval, value.var = 'value'), id.vars = 'ID')
for (i in 1:nrow(df.melt)) {
df.melt$comparison[i] <- unlist(strsplit(x = as.character(df.melt$variable)[i], split = "_"))[1]
df.melt$cluster[i] <- unlist(strsplit(x = as.character(df.melt$variable)[i], split = "_"))[2]
df.melt$patient[i] <- unlist(strsplit(x = as.character(df.melt$variable)[i], split = "_"))[3]
}
df.melt$ID <- as.factor(df.melt$ID)
df.melt$comparison <- as.factor(df.melt$comparison)
df.melt$cluster <- as.factor(df.melt$cluster)
df.melt$patient <- as.factor(df.melt$patient)
# dropping levels
df.melt$cluster <- droplevels(df.melt$cluster)
df.melt$comparison <- droplevels(df.melt$comparison)
df.melt$variable <- droplevels(df.melt$variable)
# col order
df.melt$variable <- factor(x = df.melt$variable, levels = comp_order)
# row order
terms_order <- intersect(terms2group$TERM, df.melt$ID)
terms2group <- terms2group[terms2group$TERM %in% terms_order,]
df.melt$ID <- factor(df.melt$ID, levels = terms_order)
df.melt <- merge.data.frame(x = df.melt, y = terms2group, by.x = "ID", by.y = "TERM", all.x = T)
df.melt$TGROUP <- factor(x = df.melt$TGROUP, levels = rev(c("A","B","C","D","E","F","G")))
png(filename = paste0("Del7_GSEA_plot_padj_0.1_",comp,".png"), width = 10, height = 9, units = "in", res = 300)
print(ggplot(data = df.melt, mapping = aes(x = variable, y = ID)) +
facet_grid(TGROUP ~ comparison, scales = "free", space = "free") +
geom_tile(aes(fill=value), width=0.70, height=0.70, color="white", size = 0.30) + labs(fill = "NES") +
ggtitle(label = "GSEA hallmarks", subtitle = "p.adj < 0.1") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
strip.background = element_blank(),
strip.text = element_blank(),
plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.ticks = element_blank(), axis.text = element_text(size = 7), axis.title = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "white")) +
scale_fill_gradient2(na.value = '#ECECED', #limits=c(minval, maxval), breaks=seq(minval,maxval,by=1),
low='#005295',high='#770000', mid = 'white'))
dev.off()
svg(filename = paste0("Del7_GSEA_plot_padj_0.1_",comp,".svg"),
width = 10, height = 9)
print(ggplot(data = df.melt, mapping = aes(x = variable, y = ID)) +
facet_grid(TGROUP ~ comparison, scales = "free", space = "free") +
geom_tile(aes(fill=value), width=0.70, height=0.70, color="white", size = 0.30) + labs(fill = "NES") +
ggtitle(label = "GSEA hallmarks", subtitle = "p.adj < 0.1") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
strip.background = element_blank(),
strip.text = element_blank(),
plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.ticks = element_blank(), axis.text = element_text(size = 7), axis.title = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "white")) +
scale_fill_gradient2(na.value = '#ECECED', #limits=c(minval, maxval), breaks=seq(minval,maxval,by=1),
low='#005295',high='#770000', mid = 'white'))
dev.off()
saveRDS(df.melt,file = paste0(comp, "_df.melt.rds"))
}
TERM,TGROUP
MTORC1_SIGNALING,A
KRAS_SIGNALING_DN,A
KRAS_SIGNALING_UP,A
WNT_BETA_CATENIN_SIGNALING,A
NOTCH_SIGNALING,A
TGF_BETA_SIGNALING,A
PI3K_AKT_MTOR_SIGNALING,A
HEDGEHOG_SIGNALING,A
EPITHELIAL_MESENCHYMAL_TRANSITION,A
APICAL_JUNCTION,A
APICAL_SURFACE,A
ANGIOGENESIS,B
COAGULATION,B
COMPLEMENT,B
ALLOGRAFT_REJECTION,B
IL2_STAT5_SIGNALING,B
IL6_JAK_STAT3_SIGNALING,B
INFLAMMATORY_RESPONSE,B
INTERFERON_GAMMA_RESPONSE,B
INTERFERON_ALPHA_RESPONSE,B
TNFA_SIGNALING_VIA_NFKB,B
E2F_TARGETS,C
G2M_CHECKPOINT,C
MITOTIC_SPINDLE,C
MYC_TARGETS_V2,C
MYC_TARGETS_V1,C
APOPTOSIS,D
UV_RESPONSE_DN,D
UV_RESPONSE_UP,D
P53_PATHWAY,D
DNA_REPAIR,D
UNFOLDED_PROTEIN_RESPONSE,D
PROTEIN_SECRETION,D
PEROXISOME,D
GLYCOLYSIS,E
HEME_METABOLISM,E
HYPOXIA,E
REACTIVE_OXYGEN_SPECIES_PATHWAY,E
OXIDATIVE_PHOSPHORYLATION,E
ADIPOGENESIS,G
CHOLESTEROL_HOMEOSTASIS,G
ESTROGEN_RESPONSE_EARLY,G
ESTROGEN_RESPONSE_LATE,G
ANDROGEN_RESPONSE,G
FATTY_ACID_METABOLISM,G
BILE_ACID_METABOLISM,G
SPERMATOGENESIS,G
PANCREAS_BETA_CELLS,G
XENOBIOTIC_METABOLISM,G
MYOGENESIS,G
\ No newline at end of file
library(openxlsx)
library(ggplot2)
library(reshape2)
library(Seurat)
library(dplyr)
annotations <- read.xlsx(xlsxFile = "Figure_5E_5L_data.xlsx")
annotations <- annotations[,c(1:2)]
colnames(annotations) <- c("Cl","Category")
obj <- readRDS(file = "PDX_bulk.rds") # PDX bulk seurat object
obj$refined.hash.ID_noSample <- gsub(x = obj$refined.hash.ID, pattern = "^.+_", replacement = "", perl = T)
obj$PT_hashIDref <- paste0(obj$PtID_Condition, "_",obj$refined.hash.ID_noSample)
mydata_full.melt <- as.data.frame(round(prop.table(x = table(obj$RNA_snn_h.RNA_DonorID_res.0.6,obj$PT_hashIDref),margin = 2) * 100,3))
ctrl.index <- grep(mydata_full.melt$Var2, pattern = "Control")
treated.index <- grep(mydata_full.melt$Var2, pattern = "Treated")
mydata_treated.melt <- mydata_full.melt[treated.index,]
mydata_treated.melt$PT <- gsub(x = mydata_treated.melt$Var2, pattern = "_.+$", replacement = "", perl = T)
colnames(mydata_treated.melt) <- c("Cl","ID","Value","PT")
mydata_treated.melt$PTCl <- paste0(mydata_treated.melt$PT, "_", mydata_treated.melt$Cl)
mydata_ctrl.melt <- mydata_full.melt[ctrl.index,]
mydata_ctrl.melt$PT <- gsub(x = mydata_ctrl.melt$Var2, pattern = "_.+$", replacement = "", perl = T)
avg_pct_by_pt_cluster <- as.data.frame(mydata_ctrl.melt %>% group_by(PT, Var1) %>% summarise(avg_pct = mean(Freq)))
colnames(avg_pct_by_pt_cluster) <- c("PT","Cl","AvgCtrl")
avg_pct_by_pt_cluster$PTCl <- paste0(avg_pct_by_pt_cluster$PT, "_", avg_pct_by_pt_cluster$Cl)
avg_pct_by_pt_cluster$Cl <- NULL
avg_pct_by_pt_cluster$PT <- NULL
mydata_treated.melt_annot <- merge.data.frame(x = mydata_treated.melt,
y = avg_pct_by_pt_cluster,
by = "PTCl", sort = F, all.x = T)
mydata_treated.melt_annot$logFC <- log(mydata_treated.melt_annot$Value / mydata_treated.melt_annot$AvgCtrl)
mydata_treated.melt_annot <- mydata_treated.melt_annot[!mydata_treated.melt_annot$Cl %in% c(13,14,15),]
mydata_treated.melt_annot <- mydata_treated.melt_annot[complete.cases(mydata_treated.melt_annot),]
mydata_treated.melt_annot <- mydata_treated.melt_annot[is.finite(mydata_treated.melt_annot$logFC),]
mydata_treated.melt_annot <- droplevels(mydata_treated.melt_annot)
variable_order <- c("126High",
"Prog",
"MyProg",
"Mye",
"pDC",
"Ery",
"Cycle",
"CMP"
)
mydata_treated.melt_annot_b <- merge.data.frame(x = mydata_treated.melt_annot, y = annotations, by = "Cl", sort = F, all.x = T)
mydata_treated.melt_annot_b$Category <- factor(mydata_treated.melt_annot_b$Category, levels = variable_order)
colnames(mydata_treated.melt_annot_b) <- c("Cluster","PTCl","ID","Value","PT","AvgCtrl","logFC","Category")
png(filename = "Figure_5E.png", width = 9, height = 6, units = "in", res = 300)
ggplot(data = mydata_treated.melt_annot_b, mapping = aes(Cluster, logFC, color = Cluster)) +
geom_boxplot(outlier.color = "black", outlier.size = 0,outlier.alpha = 0) +
geom_point(position=position_dodge(width=0.75),aes(group=PT, shape = PT)) + ylim(-5,5) + geom_hline(yintercept = 0, alpha = 0.5) +
facet_grid(. ~ Category, scales = "free_x",space = "free_x") + ylab("logFC Treated vs Control") +
theme(strip.background = element_blank(), strip.text = element_text(face = "bold"),
legend.key = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"))
dev.off()
svg(filename = "Figure_5E.svg", width = 9, height = 6)
ggplot(data = mydata_treated.melt_annot_b, mapping = aes(Cluster, logFC, color = Cluster)) +
geom_boxplot(outlier.color = "black", outlier.size = 0,outlier.alpha = 0) +
geom_point(position=position_dodge(width=0.75),aes(group=PT, shape = PT)) + ylim(-5,5) + geom_hline(yintercept = 0, alpha = 0.5) +
facet_grid(. ~ Category, scales = "free_x",space = "free_x") + ylab("logFC Treated vs Control") +
theme(strip.background = element_blank(), strip.text = element_text(face = "bold"),
legend.key = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"))
dev.off()
mydata_full.melt_1.2 <- as.data.frame(round(prop.table(x = table(obj$RNA_snn_h.RNA_DonorID_res.1.2,obj$PT_hashIDref),margin = 2) * 100,3))
ctrl.index_1.2 <- grep(mydata_full.melt_1.2$Var2, pattern = "Control")
treated.index_1.2 <- grep(mydata_full.melt_1.2$Var2, pattern = "Treated")
mydata_treated.melt_1.2 <- mydata_full.melt_1.2[treated.index_1.2,]
mydata_treated.melt_1.2$PT <- gsub(x = mydata_treated.melt_1.2$Var2, pattern = "_.+$", replacement = "", perl = T)
colnames(mydata_treated.melt_1.2) <- c("Cl","ID","Value","PT")
mydata_treated.melt_1.2$PTCl <- paste0(mydata_treated.melt_1.2$PT, "_", mydata_treated.melt_1.2$Cl)
mydata_ctrl.melt.1.2 <- mydata_full.melt_1.2[ctrl.index_1.2,]
mydata_ctrl.melt.1.2$PT <- gsub(x = mydata_ctrl.melt.1.2$Var2, pattern = "_.+$", replacement = "", perl = T)
avg_pct_by_pt_cluster.1.2 <- as.data.frame(mydata_ctrl.melt.1.2 %>% group_by(PT, Var1) %>% summarise(avg_pct = mean(Freq)))
colnames(avg_pct_by_pt_cluster.1.2) <- c("PT","Cl","AvgCtrl")
avg_pct_by_pt_cluster.1.2$PTCl <- paste0(avg_pct_by_pt_cluster.1.2$PT, "_", avg_pct_by_pt_cluster.1.2$Cl)
avg_pct_by_pt_cluster.1.2$Cl <- NULL
avg_pct_by_pt_cluster.1.2$PT <- NULL
mydata_treated.melt_annot.1.2 <- merge.data.frame(x = mydata_treated.melt_1.2,
y = avg_pct_by_pt_cluster.1.2,
by = "PTCl", sort = F, all.x = T)
mydata_treated.melt_annot.1.2$logFC <- log(mydata_treated.melt_annot.1.2$Value / mydata_treated.melt_annot.1.2$AvgCtrl)
mydata_treated.melt_annot.1.2 <- mydata_treated.melt_annot.1.2[mydata_treated.melt_annot.1.2$Cl %in% c(5,6,19),]
mydata_treated.melt_annot.1.2 <- mydata_treated.melt_annot.1.2[complete.cases(mydata_treated.melt_annot.1.2),]
mydata_treated.melt_annot.1.2 <- mydata_treated.melt_annot.1.2[is.finite(mydata_treated.melt_annot.1.2$logFC),]
mydata_treated.melt_annot.1.2 <- droplevels(mydata_treated.melt_annot.1.2)
mydata_treated.melt_annot.1.2$Cl <- factor(mydata_treated.melt_annot.1.2$Cl, levels = c(5,6,19))
mydata_treated.melt_annot.1.2$Category <- "LSC subclusters"
png(filename = "Figure_5L.png", width = 9, height = 6, units = "in", res = 300)
ggplot(data = mydata_treated.melt_annot.1.2, mapping = aes(Cl, logFC, color = Cl)) +
geom_boxplot(position=position_dodge(width=0.75), outlier.color = "black", outlier.size = 0, outlier.alpha = 0) +
scale_color_manual(values = c("#f64343","#a60505","#680000")) + ylab("logFC Treated vs Control") + xlab("Cluster") +
geom_point(position=position_dodge(width=0.75),aes(group=PT, shape = PT)) + ylim(-6,6) + geom_hline(yintercept = 0, alpha = 0.5) +
facet_grid(. ~ Category, scales = "free_x",space = "free_x") + ylab("logFC Treated vs Control") +
theme(strip.background = element_blank(), strip.text = element_text(face = "bold"),
legend.key = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"))
dev.off()
svg(filename = "Figure_5L.svg", width = 9, height = 6)
ggplot(data = mydata_treated.melt_annot.1.2, mapping = aes(Cl, logFC, color = Cl)) +
geom_boxplot(position=position_dodge(width=0.75), outlier.color = "black", outlier.size = 0, outlier.alpha = 0) +
scale_color_manual(values = c("#f64343","#a60505","#680000")) + ylab("logFC Treated vs Control") + xlab("Cluster") +
geom_point(position=position_dodge(width=0.75),aes(group=PT, shape = PT)) + ylim(-6,6) + geom_hline(yintercept = 0, alpha = 0.5) +
facet_grid(. ~ Category, scales = "free_x",space = "free_x") + ylab("logFC Treated vs Control") +
theme(strip.background = element_blank(), strip.text = element_text(face = "bold"),
legend.key = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"))
dev.off()
library(Seurat)
library(dplyr)
library(openxlsx)
library(clusterProfiler)
library(reshape2)
library(ggplot2)
library(RColorBrewer)
library(gplots)
library(pheatmap)
markersList_singlePT <- readRDS("Figure_5H_5G_data.rds")
# SAVE EXCEL LISTS of filtered markers ordered for logfc
markersList_singlePT_filtered <- list()
for (y in names(markersList_singlePT)){
markersList_singlePT_filtered[[paste0(y)]] <- as.data.frame(markersList_singlePT[[paste0(y)]]) %>% arrange(desc(avg_logFC))
if (nrow(markersList_singlePT_filtered[[paste0(y)]]) == 0) {
next
}
}
gmset <- read.gmt(gmtfile = "Figure_5G_data.gmt")
gsea_results_pt <- list()
for(i in names(markersList_singlePT_filtered)){
geneList <- markersList_singlePT_filtered[[i]]$avg_logFC
names(geneList) <- as.character(row.names(markersList_singlePT_filtered[[i]]))
gsea_results_pt[[i]] <- GSEA(geneList, TERM2GENE=gmset, verbose=FALSE)
}
# full list with also not significant
gsea_results_pt_full <- list()
for(i in names(markersList_singlePT_filtered)){
geneList <- markersList_singlePT_filtered[[i]]$avg_logFC
names(geneList) <- as.character(row.names(markersList_singlePT_filtered[[i]]))
gsea_results_pt_full[[i]] <- GSEA(geneList, TERM2GENE=gmset, verbose=FALSE, pvalueCutoff = 1)
}
gsea_results_pt_full_df <- list()
for(i in names(markersList_singlePT_filtered)){
gsea_results_pt_full_df[[i]] <- gsea_results_pt_full[[i]]@result
}
# Step 1. Melt data in order to combine easily all the dataframes
gsea_df <- gsea_results_pt_full_df
gsea_df_melt <- list()
counter <- 1
names(gsea_df) <- gsub(pattern = "Cluster_0", replacement = "Prog_0", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_10", replacement = "Mye_10", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_11", replacement = "pDC_11", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_12", replacement = "CMP-like_12", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_1", replacement = "126High_1", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_2", replacement = "Cycle_2", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_3", replacement = "Mye_3", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_4", replacement = "Cycle_4", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_5", replacement = "Ery_5", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_6", replacement = "Ery_6", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_7", replacement = "MyProg_7", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_8", replacement = "Prog_8", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_9", replacement = "Cycle_9", x = names(gsea_df))
for (i in names(gsea_df)) {
#print(i)
comparison <- unlist(strsplit(x = i, split = "_"))[1]
cluster <- unlist(strsplit(x = i, split = "_"))[2]
keyval <- i
#print(paste0(keyval, ":", comparison, ",", cluster, ",", patient))
gsea_df[[i]]$comparison <- comparison
gsea_df[[i]]$cluster <- cluster
gsea_df[[i]]$keyval <- keyval
gsea_df_melt[[i]] <- melt(data = gsea_df[[i]], id.vars = c("ID", "comparison","keyval", "cluster", "p.adjust"),
measure.vars = c("NES"))
gsea_df_melt[[i]]$value <- round(gsea_df_melt[[i]]$value, digits = 2)
counter <- counter + 1
}
total <- bind_rows(gsea_df, .id = "column_label")
total_melt <- bind_rows(gsea_df_melt, .id = "column_label")
total_melt_filtered <- subset(x = total_melt, subset = p.adjust < 0.1)
myabs <- max(abs(x = total_melt$value))
total_melt_filtered_back <- total_melt_filtered
total_melt_filtered$ID <- as.factor(total_melt_filtered$ID)
total_melt_filtered$comparison <- as.factor(total_melt_filtered$comparison)
total_melt_filtered$cluster <- as.factor(total_melt_filtered$cluster)
uu <- melt(data = dcast(data = total_melt_filtered, formula = ID ~ keyval, value.var = 'value'), id.vars = 'ID')
for (i in 1:nrow(uu)) {
uu$comparison[i] <- unlist(strsplit(x = as.character(uu$variable)[i], split = "_"))[1]
uu$cluster[i] <- unlist(strsplit(x = as.character(uu$variable)[i], split = "_"))[2]
}
uu$ID <- as.factor(uu$ID)
uu$comparison <- as.factor(uu$comparison)
uu$cluster <- as.factor(uu$cluster)
dall <- uu
dall$cluster <- droplevels(dall$cluster)
# 1. drop levels of variable and comparison
dall$comparison <- droplevels(dall$comparison)
dall$variable <- droplevels(dall$variable)
# 2. reorder variable based on cluster factor order
dall$variable <- factor(x = dall$variable,
levels = unique(as.character(dall[ order(dall$cluster), ]$variable)))
dall_copy <- dall
######## Custom order of IDs and comparisons #######
dall$ID <- gsub(x = dall$ID, pattern = "HALLMARK_", replacement = "")
terms2group <- read.table(file = "Figure_5G_term2groups.txt", sep =",", header = T)
terms_order <- intersect(terms2group$TERM, dall$ID)
terms2group <- subset.data.frame(terms2group, subset = TERM %in% terms_order)
dall <- dall[!dall$ID %in% c("MYOGENESIS","SPERMATOGENESIS",
"BILE_ACID_METABOLISM",
"PROTEIN_SECRETION"),]
dall$ID <- factor(dall$ID, levels = terms_order)
# variable order according to paper
variable_order <- c("126High_1",
"Prog_0",
"Prog_8",
"MyProg_7",
"Mye_3",
"Mye_10",
"pDC_11",
"Ery_5",
"Ery_6",
"Cycle_2",
"Cycle_4",
"Cycle_9",
"CMP-like_12"
)
dall$variable <- factor(dall$variable, levels = variable_order)
#### grouping terms ####
dall <- merge.data.frame(x = dall, y = terms2group, by.x = "ID", by.y = "TERM", all.x = T)
dall$TGROUP <- factor(x = dall$TGROUP, levels = rev(c("A","B","C","D","E","F","G","H")))
b <- ggplot(data = dall, mapping = aes(x = variable, y = ID)) +
facet_grid(TGROUP ~ ., scales = "free", space = "free") +
geom_tile(aes(fill=value), width=0.60, height=0.70, color="white", size = 0.30) + labs(fill = "NES") +
ggtitle(label = "GSEA hallmarks", subtitle = "p.adj < 0.1") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), strip.text.x = element_text(size = 0),
strip.text.y = element_blank(), strip.background = element_blank(),
plot.title = element_text(hjust = 0, size = 10), plot.subtitle = element_text(hjust = 0, size = 7),
axis.ticks = element_blank(), axis.text = element_text(size = 6), axis.title = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "white")) +
scale_fill_gradient2(na.value = '#ECECED', limits=c(-3, 3), breaks=seq(-3,3,by=1),
low='#005295',high='#770000', mid = 'white')
svg(filename = "Figure_5G.svg", width = 9, height = 9)
b
dev.off()
png(filename = "Figure_5G.png", width = 6, height = 6, res = 600, units = "in")
b
dev.off()
This diff is collapsed.
TERM,TGROUP
HEDGEHOG_SIGNALING,A
NOTCH_SIGNALING,A
MTORC1_SIGNALING,A
KRAS_SIGNALING_DN,A
KRAS_SIGNALING_UP,A
WNT_BETA_CATENIN_SIGNALING,A
TGF_BETA_SIGNALING,A
PI3K_AKT_MTOR_SIGNALING,A
EPITHELIAL_MESENCHYMAL_TRANSITION,A
APICAL_JUNCTION,A
APICAL_SURFACE,A
COAGULATION,B
COMPLEMENT,B
ALLOGRAFT_REJECTION,B
IL2_STAT5_SIGNALING,B
IL6_JAK_STAT3_SIGNALING,B
INFLAMMATORY_RESPONSE,B
INTERFERON_GAMMA_RESPONSE,B
INTERFERON_ALPHA_RESPONSE,B
TNFA_SIGNALING_VIA_NFKB,B
E2F_TARGETS,C
G2M_CHECKPOINT,C
MITOTIC_SPINDLE,C
MYC_TARGETS_V2,C
MYC_TARGETS_V1,C
APOPTOSIS,D
UV_RESPONSE_DN,D
UV_RESPONSE_UP,D
P53_PATHWAY,D
DNA_REPAIR,D
UNFOLDED_PROTEIN_RESPONSE,D
PEROXISOME,D
GLYCOLYSIS,E
HEME_METABOLISM,E
HYPOXIA,E
REACTIVE_OXYGEN_SPECIES_PATHWAY,E
OXIDATIVE_PHOSPHORYLATION,E
ADIPOGENESIS,G
CHOLESTEROL_HOMEOSTASIS,G
ESTROGEN_RESPONSE_EARLY,G
ESTROGEN_RESPONSE_LATE,G
ANDROGEN_RESPONSE,G
FATTY_ACID_METABOLISM,G
XENOBIOTIC_METABOLISM,G
library(Seurat)
library(dplyr)
library(openxlsx)
library(clusterProfiler)
library(reshape2)
library(ggplot2)
library(RColorBrewer)
library(gplots)
library(pheatmap)
markersList_singlePT <- readRDS("Figure_5H_5G_data.rds")
# SAVE EXCEL LISTS of filtered markers ordered for logfc
markersList_singlePT_filtered <- list()
for (y in names(markersList_singlePT)){
markersList_singlePT_filtered[[paste0(y)]] <- as.data.frame(markersList_singlePT[[paste0(y)]]) %>% arrange(desc(avg_logFC))
if (nrow(markersList_singlePT_filtered[[paste0(y)]]) == 0) {
next
}
}
# RUN GSEA ANALYSIS
# Download GMT file
gmset <- read.gmt(gmtfile = "Figure_5H_data.gmt")
gsea_results_pt <- list()
for(i in names(markersList_singlePT_filtered)){
geneList <- markersList_singlePT_filtered[[i]]$avg_logFC
names(geneList) <- as.character(row.names(markersList_singlePT_filtered[[i]]))
gsea_results_pt[[i]] <- GSEA(geneList, TERM2GENE=gmset, verbose=FALSE)
}
gsea_results_pt_df <- list()
for(i in names(markersList_singlePT_filtered)){
gsea_results_pt_df[[i]] <- gsea_results_pt[[i]]@result
}
# full list with also not significant
gsea_results_pt_full <- list()
for(i in names(markersList_singlePT_filtered)){
geneList <- markersList_singlePT_filtered[[i]]$avg_logFC
names(geneList) <- as.character(row.names(markersList_singlePT_filtered[[i]]))
gsea_results_pt_full[[i]] <- GSEA(geneList, TERM2GENE=gmset, verbose=FALSE, pvalueCutoff = 1)
}
gsea_results_pt_full_df <- list()
for(i in names(markersList_singlePT_filtered)){
gsea_results_pt_full_df[[i]] <- gsea_results_pt_full[[i]]@result
}
# Step 1. Melt data in order to combine easily all the dataframes
gsea_df <- gsea_results_pt_full_df
gsea_df_melt <- list()
counter <- 1
names(gsea_df) <- gsub(pattern = "Cluster_0", replacement = "Prog_0", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_10", replacement = "Mye_10", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_11", replacement = "pDC_11", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_12", replacement = "CMP-like_12", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_1", replacement = "126High_1", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_2", replacement = "Cycle_2", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_3", replacement = "Mye_3", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_4", replacement = "Cycle_4", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_5", replacement = "Ery_5", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_6", replacement = "Ery_6", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_7", replacement = "MyProg_7", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_8", replacement = "Prog_8", x = names(gsea_df))
names(gsea_df) <- gsub(pattern = "Cluster_9", replacement = "Cycle_9", x = names(gsea_df))
for (i in names(gsea_df)) {
comparison <- unlist(strsplit(x = i, split = "_"))[1]
cluster <- unlist(strsplit(x = i, split = "_"))[2]
keyval <- i
gsea_df[[i]]$comparison <- comparison
gsea_df[[i]]$cluster <- cluster
gsea_df[[i]]$keyval <- keyval
gsea_df_melt[[i]] <- melt(data = gsea_df[[i]], id.vars = c("ID", "comparison","keyval", "cluster", "p.adjust"),
measure.vars = c("NES"))
gsea_df_melt[[i]]$value <- round(gsea_df_melt[[i]]$value, digits = 2)
counter <- counter + 1
}
total <- bind_rows(gsea_df, .id = "column_label")
total_melt <- bind_rows(gsea_df_melt, .id = "column_label")
total_melt_filtered <- subset(x = total_melt, subset = p.adjust < 0.1)
myabs <- max(abs(x = total_melt$value))
total_melt_filtered_back <- total_melt_filtered
total_melt_filtered$ID <- as.factor(total_melt_filtered$ID)
total_melt_filtered$comparison <- as.factor(total_melt_filtered$comparison)
total_melt_filtered$cluster <- as.factor(total_melt_filtered$cluster)
uu <- melt(data = dcast(data = total_melt_filtered, formula = ID ~ keyval, value.var = 'value'), id.vars = 'ID')
for (i in 1:nrow(uu)) {
uu$comparison[i] <- unlist(strsplit(x = as.character(uu$variable)[i], split = "_"))[1]
uu$cluster[i] <- unlist(strsplit(x = as.character(uu$variable)[i], split = "_"))[2]
}
uu$ID <- as.factor(uu$ID)
uu$comparison <- as.factor(uu$comparison)
uu$cluster <- as.factor(uu$cluster)
dall <- uu
dall$cluster <- droplevels(dall$cluster)
# 1. drop levels of variable and comparison
dall$comparison <- droplevels(dall$comparison)
dall$variable <- droplevels(dall$variable)
# 2. reorder variable based on cluster factor order
dall$variable <- factor(x = dall$variable,
levels = unique(as.character(dall[ order(dall$cluster), ]$variable)))
dall_copy <- dall
######## Custom order of IDs and comparisons #######
terms2group <- read.table(file = "Figure_5H_term2groups.txt", sep =",", header = T)
terms_order <- intersect(terms2group$TERM, dall$ID)
terms2group <- subset.data.frame(terms2group, subset = TERM %in% terms_order)
dall$ID <- factor(dall$ID, levels = terms_order)
# variable order according to paper
variable_order <- c("126High_1",
"Prog_0",
"Prog_8",
"MyProg_7",
"Mye_3",
"Mye_10",
"pDC_11",
"Ery_5",
"Ery_6",
"Cycle_2",
"Cycle_4",
"Cycle_9",
"CMP-like_12"
)
dall$variable <- factor(dall$variable, levels = variable_order)
#### grouping terms ####
dall <- merge.data.frame(x = dall, y = terms2group, by.x = "ID", by.y = "TERM", all.x = T)
dall$TGROUP <- factor(x = dall$TGROUP, levels = rev(c("A","B","C","D","E","F","G","H")))
b <- ggplot(data = dall, mapping = aes(x = variable, y = ID)) +
geom_tile(aes(fill=value), width=0.70, height=0.70, color="white", size = 0.20) + labs(fill = "NES") +
ggtitle(label = "GSEA senescence terms", subtitle = "p.adj < 0.1") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), strip.text.x = element_text(size = 0),
strip.text.y = element_blank(), strip.background = element_blank(),
plot.title = element_text(hjust = 0), plot.subtitle = element_text(hjust = 0),
axis.ticks = element_blank(), axis.text = element_text(size = 8), axis.title = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "white")) +
scale_fill_gradient2(na.value = '#ECECED', limits=c(-2.7, 2.7), breaks=seq(-2,2,by=1),
low='#005295',high='#770000', mid = 'white')
svg(filename = "Figure_5H.svg", width = 9, height = 7)
b
dev.off()
png(filename = "Figure_5H.png", width = 9, height = 7, res = 600, units = "in")
b
dev.off()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment