Commit 1f735baa authored by Matteo Barcella's avatar Matteo Barcella
Browse files

scRNAseq realted scripts

parent 01d2c18d
This diff is collapsed.
TERM,GROUP
FRIDMAN_SENESCENCE_UP,A
REACTOME_CELLULAR_SENESCENCE,A
REACTOME_SENESCENCE_ASSOCIATED_SECRETORY_PHENOTYPE_SASP,A
CHICAS_RB1_TARGETS_SENESCENT,A
WP_SENESCENCE_AND_AUTOPHAGY_IN_CANCER,A
DEMAGALHAES_AGING_UP,A
DEMAGALHAES_AGING_DN,A
BIOCARTA_P53_PATHWAY,B
INGA_TP53_TARGETS,B
HALLMARK_INFLAMMATORY_RESPONSE,C
REACTOME_INTERFERON_SIGNALING,C
INTERFERON_SIGNALING (REACTOME_INTERFERON_SIGNALLING),C
REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM,C
WNT_SIGNALING,D
REACTOME_SIGNALING_BY_WNT,D
KEGG_WNT_SIGNALING_PATHWAY,D
BIOCARTA_WNT_PATHWAY,D
PID_WNT_CANONICAL_PATHWAY,D
PID_WNT_SIGNALING_PATHWAY,D
PID WNT SIGNALING PATHWAY,D
GO_CANONICAL_WNT_SIGNALING,D
GO_CANONICAL_WNT_SIGNALING_PATHWAY,D
GSE26351_UNSTIM_VS_WNT_PATHWAY_STIM_HEMATOPOIETIC_PROGENITORS_UP,D
WNT_CELL_GROWTH_AND_PROLIFERATION,D
WNT_SIGNALING_PCR_ARRAY,D
ST_WNT_BETA_CATENIN_PATHWAY,D
WNT_STANFORD,D
WNT_UP.V1_DN,D
HALLMARK_NOTCH_SIGNALING,E
KEGG_NOTCH_SIGNALING_PATHWAY,E
GO_NOTCH_SIGNALING_PATHWAY,E
PID_NOTCH_PATHWAY,E
GO_REGULATION_OF_NOTCH_SIGNALING_PATHWAY,E
NOTCH_TARGETS_PCR_ARRAY,E
VILIMAS_NOTCH1_TARGETS_UP,E
NOTCH_DN.V1_UP,E
NOTCH_DN.V1_DN,E
GO_NEGATIVE_REGULATION_OF_NOTCH_SIGNALING_PATHWAY,E
NGUYEN_NOTCH1_TARGETS_DN,E
KEGG_ECM_RECEPTOR_INTERACTION,F
KEGG_CELL_ADHESION_MOLECULES_CAMS,F
CELL_ADHESION_MOLECULES,F
TSAI_RESPONSE_TO_RADIATION_THERAPY,G
CHIARETTI_T_ALL_REFRACTORY_TO_THERAPY,G
KAN_RESPONSE_TO_ARSENIC_TRIOXIDE,G
KIM_RESPONSE_TO_TSA_AND_DECITABINE_UP,G
CHIBA_RESPONSE_TO_TSA_UP,G
RHEIN_ALL_GLUCOCORTICOID_THERAPY_UP,G
BOROVIAK_DIAPAUSE_UP,H
BOROVIAK_DIAPAUSE_DN,H
REACTOME_REGULATION_OF_MITOTIC_CELL_CYCLE,H
\ No newline at end of file
# Volcano plot T vs C cluster 1 focus
library(Seurat)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(dplyr)
library(scales)
TvsC_markers <- readRDS(file = "Figure_5K_data.rds")
markers <- TvsC_markers$Cluster_1
markers$p_val_adj[which(markers$p_val_adj == 0)] <- 1e-300
adj.pval.thr=0.05
logfc.thr = 0.15
# Volcano plot
data <- data.frame(gene = row.names(markers), pval = -log10(markers$p_val_adj), lfc = markers$avg_logFC)
data <- na.omit(data)
data <- mutate(data, color = case_when(data$lfc > logfc.thr & data$pval > -log10(adj.pval.thr) ~ "Overexpressed",
data$lfc < -logfc.thr & data$pval > -log10(adj.pval.thr) ~ "Underexpressed",
abs(data$lfc) < logfc.thr | data$pval < -log10(adj.pval.thr) ~ "NonSignificant"))
data <- data[order(data$pval, decreasing = TRUE),]
data <- data[order(data$lfc, decreasing = TRUE),]
highl <- head(subset(data, color != "NonSignificant"), 12)
vol <- ggplot(data, aes(x = lfc, y = pval, colour = color)) +
theme_bw(base_size = 12) +
theme(legend.position = "right") +
ylim(c(0,350)) +
xlab("log2 Fold Change") +
ylab(expression(-log[10]("adjusted p-value"))) +
geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray")
tn = function() trans_new('cuberoot', transform = function(x) x^(1/3),
inverse = function(x) x^3)
vol <- vol +
geom_point(size = 2, alpha = 0.8, na.rm = T) +
scale_color_manual(name = "Expression",
values = c(Overexpressed = "#ae0000",
Underexpressed = "#095786",
NonSignificant = "darkgray")) +
geom_text_repel(data = highl, mapping = aes(label = gene), size = 5, segment.alpha = 0.4,force = 50,show.legend = FALSE, max.overlaps = 30) +
theme(panel.grid.major = element_blank(),
axis.text = element_text(size = 12),
panel.grid.minor = element_blank())
plot(vol)
for (i in 1:20) {
png(filename = paste0("Plot_n_",i, ".png"), width = 9, height = 6, units = "in", res = 200)
print(plot(vol))
dev.off()
}
png("Figure_5K.png", units = "in", res = 700, height = 4.5, width = 9)
plot(vol)
dev.off()
library(Seurat)
library(ggplot2)
library(dittoSeq)
library(dplyr)
library(RColorBrewer)
library(stats)
library(openxlsx)
library(psych)
PDX <- readRDS("PDX_object.rds")
# Analysis of PDX LSC subsets in T vs C
table(PDX$RNA_snn_h.RNA_DonorID_res.1.2, PDX$RNA_Condition)
PDX <- SetIdent(PDX, value = "RNA_snn_h.RNA_DonorID_res.1.2")
cluster6cells = WhichCells(object = PDX, idents = "6")
cluster5cells = WhichCells(object = PDX, idents = "5")
cluster19cells = WhichCells(object = PDX, idents = "19")
PDX <- SetIdent(PDX, value = "RNA_Condition")
T_cells = WhichCells(object = PDX, idents = "Treated")
C_cells = WhichCells(object = PDX, idents = "Control")
cluster6_T = intersect(cluster6cells, T_cells)
cluster6_C = intersect(cluster6cells, C_cells)
cluster5_C = intersect(cluster5cells, C_cells)
Cluster6_TvsC_markers = FindMarkers(object = PDX,
ident.1 = cluster6_T,
ident.2 = cluster6_C)
Cluster6C_vs_5C_markers = FindMarkers(object = PDX,
ident.1 = cluster6_C,
ident.2 = cluster5_C)
PDX = SetIdent(PDX, value = "RNA_snn_h.RNA_DonorID_res.1.2")
cluster5_vs6and19_markers = FindMarkers(object = PDX,
ident.1 = cluster5cells,
ident.2 = c(cluster6cells, cluster19cells))
cluster6_vs5and19_markers = FindMarkers(object = PDX,
ident.1 = cluster6cells,
ident.2 = c(cluster5cells, cluster19cells))
cluster19_vs5and6_markers = FindMarkers(object = PDX,
ident.1 = cluster19cells,
ident.2 = c(cluster5cells, cluster6cells))
# These xlsx are stored in Figure_5M_data.zip
openxlsx::write.xlsx(x = cluster5_vs6and19_markers, file = "res12_cluster5_vs6and19_markers.xlsx", row.names = T)
openxlsx::write.xlsx(x = cluster6_vs5and19_markers, file = "res12_cluster6_vs5and19_markers.xlsx", row.names = T)
openxlsx::write.xlsx(x = cluster19_vs5and6_markers, file = "res12_cluster19_vs5and6_markers.xlsx", row.names = T)
top10markers_eachcluster = c(rownames(cluster5_vs6and19_markers %>% slice_max(order_by = avg_logFC, n = 10)),
rownames(cluster6_vs5and19_markers %>% slice_max(order_by = avg_logFC, n = 10)),
rownames(cluster19_vs5and6_markers %>% slice_max(order_by = avg_logFC, n = 10)))
DotPlot(PDX, features = top15markers_eachcluster, idents = c("5","6","19"))
plotinfo = dittoSeq::dittoDotPlot(object = PDX, vars = top15markers_eachcluster,
group.by = "RNA_snn_h.RNA_DonorID_res.1.2",
cells.use = WhichCells(PDX, idents = c("5","6","19")),
ylab = "LSC subclusters", y.reorder = c(2,3,1),
main = "Top 15 markers by logFC per cluster", data.out = T)
svg(filename = "Figure_5M_left.png", width = 9.9, height = 3)
ggplot(data = plotinfo$data, aes(x = var, y = grouping)) +
geom_point(aes(color = color, size = size)) +
scale_color_gradient2(high = "#b31b2c", mid = "white", low = "#2268ad") +
theme_bw() + theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = rel(1.25)),
axis.text.y = element_text(size = rel(1.5))) +
xlab("Top 15 cluster markers by logFC") + ylab("LSC subclusters") +
labs(color = paste("Relative","expression",sep="\n"), size = paste("Percent","expression",sep="\n"))
dev.off()
plotinfo_reduced = dittoSeq::dittoDotPlot(object = PDX, vars = c("CDK6","INKA1","NECTIN2","PAK4","SIRT1"),
group.by = "RNA_snn_h.RNA_DonorID_res.1.2",
cells.use = WhichCells(PDX, idents = c("5","6","19")),
ylab = "LSC subclusters", y.reorder = c(2,3,1),
main = "Focus CDK6 INKA1", data.out = T)
svg(filename = "Figure_5M_right.svg", width = 3.5, height = 3)
ggplot(data = plotinfo_reduced$data, aes(x = var, y = grouping)) +
geom_point(aes(color = color, size = size)) +
scale_color_gradient2(high = "#b31b2c", mid = "white", low = "#2268ad") +
theme_bw() + theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = rel(1.25)),
axis.text.y = element_text(size = rel(1.5))) +
xlab("HSC latency markers") + ylab("LSC subclusters") +
labs(color = paste("Relative","expression",sep="\n"), size = paste("Percent","expression",sep="\n"))
dev.off()
library(Seurat)
library(ggplot2)
library(dplyr)
library(Hmisc)
# Import AML dataset
AML <- readRDS(file = "AML_object.rds")
AMLcells <- rownames(AML@meta.data)
# Import the new updated module scores
MS <- readRDS(file = "Figure_6A_data.rds")
# ##########################
# FeaturePlots of 126High and 126Low signature:
# ##########################
AML <- AddMetaData(AML, metadata = MS %>% filter(PatientID %nin% c("PT19","PT20")) %>% select(-PatientID) %>% select(-Timepoint))
FPdata <- FetchData(AML, vars = c("X126up","X126down", "UMAP_1", "UMAP_2", "Timepoint_corr", "PatientID"))
png(filename = "Figure_6A_top.png", width = 3, height = 3.3, units = "in", res = 1200)
ggplot(FPdata %>% filter(Timepoint_corr == "DX") %>% arrange(X126up))+
geom_point(aes(x = UMAP_1, y = UMAP_2, color = X126up), size =0.01) +
scale_color_viridis_c(option = "inferno", limits = c(-0.1,0.06)) +
guides(color = guide_colorbar(reverse = F)) + theme_void() +
labs(title = "126High signature") +
theme(legend.position = "top", legend.title = element_blank(), plot.title = element_text(hjust = 0.5),
legend.key.width=unit(0.33,"in"), legend.key.height = unit(0.2, "in"))
dev.off()
# ##########################
# ViolinPlots at diagnosis:
# ##########################
AML = SetIdent(AML, value = "Timepoint_corr")
AML$PatientID = factor(AML$PatientID, levels = c("PT01", "PT02", "PT13",
"PT09","PT10","PT08","PT15",
"PT06","PT07","PT12"))
svg(filename = "Figure_6B_left.svg",
width = 5, height = 3.3)
VlnPlot(AML, idents = "DX", features = "X126up",
group.by = "PatientID" , pt.size = 0,
cols = c("#c00000","#c00000","#c00000",
"#ed7d31","#ed7d31","#ed7d31","#ed7d31",
"#70ad47","#70ad47","#70ad47")) +
geom_hline(yintercept=0, linetype="dashed", color = "black", alpha = 0.5) +
theme_bw(base_size = 6) + ggtitle("126High Signature") +
theme(legend.text=element_text(size=rel(1.5)), legend.position = "none") +
theme(plot.title = element_text(size = rel(1.5), hjust = 0.5),
axis.title.x = element_blank(), axis.text = element_text(size = rel(1.5)),
panel.grid.major.x = element_blank(), panel.grid.minor.y = element_blank())
dev.off()
round(prop.table(table(AML$X126up>0,AML$PatientID,AML$Timepoint_corr)[,,"DX"], margin = 2)*100, digits = 2)
# PT01 PT02 PT13 PT09 PT10 PT08 PT15 PT06 PT07 PT12
# FALSE 56.10 59.67 67.44 78.45 77.86 93.29 94.38 95.84 99.86 99.22
# TRUE 43.90 40.33 32.56 21.55 22.14 6.71 5.62 4.16 0.14 0.78
# ##########################
# Vlnplots dx and rel comparison
# ##########################
# ALL diagnosis vs Relapses single patients
DiagnosisVLN <- MS %>% filter(Timepoint == "DX")
DiagnosisVLN$Facet <- "Diagnosis"
DiagnosisVLNsubset <- DiagnosisVLN %>% group_by(PatientID) %>% sample_n(1400) %>% ungroup()
RelapseVLN <- MS %>% filter(Timepoint == "REL")
RelapseVLN$Facet <- "Relapse"
RelapseVLNsubset <- RelapseVLN %>% group_by(PatientID) %>% sample_n(1200) %>% ungroup()
PT08VLN <- MS %>% filter(PatientID == "PT08" & Timepoint %in% c("DX", "REL", "REL_NR"))
PT08VLN$Facet <- "PT08"
PT15VLN <- MS %>% filter(PatientID == "PT15")
PT15VLN$Facet <- "PT15"
PT19VLN <- MS %>% filter(PatientID == "PT19" & Timepoint == "REL")
PT19VLN$Facet <- "PT19"
PT20VLN <- MS %>% filter(PatientID == "PT20" & Timepoint == "REL1")
PT20VLN$Facet <- "PT20"
PT20VLN$Timepoint <- "REL"
VLN <- rbind(RelapseVLNsubset, DiagnosisVLNsubset, PT08VLN, PT15VLN, PT19VLN, PT20VLN)
VLN$Facet <- factor(VLN$Facet, levels = c("Diagnosis", "Relapse", "PT08", "PT15", "PT19", "PT20"))
# Diagnosis WITHOUT REFR AML
VLNnoREFR <- VLN
VLNnoREFR <- VLNnoREFR %>% filter(!PatientID %in% c("PT01", "PT02", "PT13"))
VLNnoREFR_noRELpool <- VLNnoREFR
VLNnoREFR_noRELpool <- VLNnoREFR_noRELpool %>% filter(!Facet == "Relapse")
VLNnoREFR_noRELpool$Facet <- ifelse(VLNnoREFR_noRELpool$Facet == "Diagnosis", "non Ref. \n AML", paste0(VLNnoREFR_noRELpool$Facet))
svg(filename = "Figure_6G.svg", height = 3.3,
width = 4.4)
ggplot(VLNnoREFR_noRELpool) +
geom_violin(aes(x = Timepoint, y = `126up`, fill = Timepoint)) +
facet_grid(~Facet, scales = "free_x", space = "free_x") +
geom_hline(yintercept=0, linetype="dashed", color = "black", alpha = 0.5) +
scale_fill_manual(values = c(CR = "#70ad47", fut_REL = "#ed7d31", REFR = "#c00000",
DX = "#cfcfcf", REL = "#458fe8", REL_NR = "#19385c")) +
theme_bw(base_size = 6) + ggtitle("126High Signature") +
theme(legend.text=element_text(size=rel(1.5)), legend.position = "none") +
theme(plot.title = element_text(size = rel(2), hjust = 0.5), axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text = element_text(size = rel(1.5)), strip.text = element_text(size = rel(2)),
panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), strip.background = element_blank())
dev.off()
# ##########################
# Vlnplot LSC subclusters
# ##########################
AML_0_11_14_final <- readRDS("AML_subclustering.rds")
AML_0_11_14_final_cells <- rownames(AML_0_11_14_final@meta.data)
MS_cl_0_11_14 <- MS[AML_0_11_14_final_cells,]
MS_cl_0_11_14 <- MS_cl_0_11_14 %>% select(-Timepoint) %>% select(-PatientID)
AML_0_11_14_final <- AddMetaData(AML_0_11_14_final, metadata = MS_cl_0_11_14)
AML_0_11_14_final$Timepoint_corr <- factor(AML_0_11_14_final$Timepoint_corr, levels = c("DX","D14","D30","REL","REL_NR"))
# ##########################
# DotPlots
# ##########################
plotinfo_latency = dittoSeq::dittoDotPlot(object = AML_0_11_14_final, vars = c("X126up","CDK6","INKA1"),
group.by = "RNA_snn_h.PatientID_res.0.6",
ylab = "LSC subclusters",
data.out = T)
svg(filename = "Figure_6E_HSC_latency.png", width = 2.5, height = 4)
ggplot(data = plotinfo_latency$data, aes(x = var, y = grouping)) +
geom_point(aes(color = color, size = size)) +
scale_color_gradient2(high = "#b31b2c", mid = "white", low = "#2268ad") +
theme_bw() + theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = rel(1.25)),
axis.text.y = element_text(size = rel(1.5))) + scale_x_discrete(labels= c("126High","CDK6", "INKA1")) +
xlab("HSC latency") + ylab("LSC subclusters") +
labs(color = paste("Relative","expression",sep="\n"), size = paste("Percent","expression",sep="\n"))
dev.off()
# ##########################
# CellCycle Barplots
# ##########################
PT20_active <- readRDS("PT20_active.rds")
PT20cells <- rownames(PT20_active@meta.data)
PT19_active <- readRDS("PT19_active.rds")
PT19cells <- rownames(PT19_active@meta.data)
MS_pt19 <- MS[PT19cells,]
MS_pt19 <- MS_pt19 %>% select(-PatientID) %>% select(-Timepoint)
MS_pt20 <- MS[PT20cells,]
MS_pt20 <- MS_pt20 %>% select(-PatientID) %>% select(-Timepoint)
PT19_active <- AddMetaData(object = PT19_active, metadata = MS_pt19)
PT20_active <- AddMetaData(object = PT20_active, metadata = MS_pt20)
PT19data <- FetchData(object = PT19_active, vars = c("RNA_PatientID","RNA_Timepoint", "Phase", "X126up"))
PT19data$AML_up_full_cat <- ifelse(PT19data$X126up > 0, "POS", "NEG")
PT20data <- FetchData(object = PT20_active, vars = c("RNA_PatientID","RNA_Timepoint", "Phase", "X126up"))
PT20data$AML_up_full_cat <- ifelse(PT20data$X126up > 0, "POS", "NEG")
PT20data$RNA_Timepoint <- ifelse(PT20data$RNA_Timepoint == "REL1", "REL", paste0(PT20data$RNA_Timepoint))
AMLdata <- FetchData(object = AML, vars = c("PatientID","Timepoint_corr", "Phase", "X126up"))
AMLdata$AML_up_full_cat <- ifelse(AMLdata$X126up > 0, "POS", "NEG")
colnames(AMLdata) <- colnames(PT19data)
Data <- rbind(AMLdata, PT19data, PT20data)
Data_rel <- filter(Data, RNA_Timepoint == "REL")
Data_rel_summ <- Data_rel %>% group_by(RNA_PatientID, AML_up_full_cat, Phase) %>% summarise(n = n())
png(filename = "Figure_6H_bottom_left.png", units = "in", height = 3, width = 3, res = 300)
ggplot(Data_rel_summ) +
geom_col(aes(x= AML_up_full_cat, y = n, fill = Phase), position="fill", alpha = 0.8) +
scale_fill_manual(values = c("lightgrey","orange","red")) +
scale_y_continuous(labels = function(x) paste0(x*100, "%")) +
facet_wrap(~RNA_PatientID, nrow = 1) + theme_bw() +
ggtitle("Relapse AML") + xlab("126High Signature") +
theme(panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(),
strip.background = element_blank(), axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5), legend.position = "bottom", axis.text.x = element_text())
dev.off()
Data_dx <- filter(Data, RNA_Timepoint == "DX")
Data_dx_summ <- Data_dx %>% group_by(RNA_PatientID, AML_up_full_cat, Phase) %>% summarise(n = n())
png(filename = "Figure_6H_top.png", units = "in", height = 3, width = 3.3*2.5, res = 300)
ggplot(Data_dx_summ) +
geom_col(aes(x= AML_up_full_cat, y = n, fill = Phase), position="fill") +
scale_fill_manual(values = c("lightgrey","orange","red")) +
scale_y_continuous(labels = function(x) paste0(x*100, "%")) +
facet_wrap(~RNA_PatientID, nrow = 1) + theme_bw() +
ggtitle("Diagnosis AML") + xlab("126High Signature") +
theme(panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(),
strip.background = element_blank(), axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5), legend.position = "bottom")
dev.off()
Data_relnr <- filter(Data, RNA_Timepoint == "REL_NR")
Data_relnr_summ <- Data_relnr %>% group_by(RNA_PatientID, AML_up_full_cat, Phase) %>% summarise(n = n())
png(filename = "Figure_6H_bottom_right.png", units = "in", height = 3, width = 2.75, res = 300)
ggplot(Data_relnr_summ) +
geom_col(aes(x= AML_up_full_cat, y = n, fill = Phase), position="fill") +
scale_fill_manual(values = c("lightgrey","orange","red")) +
scale_y_continuous(labels = function(x) paste0(x*100, "%")) +
facet_wrap(~RNA_PatientID, nrow = 1) + theme_bw() +
ggtitle("Relapse NR AML") + xlab("126High Signature") +
theme(panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(),
strip.background = element_blank(), axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5), legend.position = "bottom")
dev.off()
library(Seurat)
library(ggplot2)
library(ComplexHeatmap)
library(dplyr)
library(dittoSeq)
AML <- readRDS("AML_object.rds")
markers_res06 <- AML@misc$clustermarkers$res0.6
AML$RNA_snn_res.0.6 <- factor(AML$RNA_snn_res.0.6, levels = c("0","11","14",
"3","1",
"9","7","13",
"2","10",
"5","6","4","8","12"))
markers_res06_up <- list()
for (x in levels(AML$RNA_snn_res.0.6)) {
markers_res06_up[[paste0("cl",x)]] <- markers_res06[[paste0("cl",x)]][["up"]]
}
fun <- function(x) {
x %>% slice_max(order_by = avg_logFC, n=20)
}
markers_res06_top20 <- lapply(markers_res06_up, FUN = fun)
genes_to_plot <- c()
tmp <- c()
for (x in levels(AML$RNA_snn_res.0.6)) {
tmp <- markers_res06_top20[[paste0("cl",x)]]$gene
genes_to_plot <- c(genes_to_plot, tmp)
}
genes_to_plot_unique <- unique(genes_to_plot)
genenames_to_plot <- c("CD52","FAM30A","IGHM","CD99","TNFRSF4","GUCY1A1","EGFL7","ADGRE5","GADD45A","IGFBP2","MDM2",
"BST2","IFIT3","HSPA1A","HSPA1B","RAB31","HLA-A","CD34","LTB","HLA-DRB1","HOPX","IRF1","RPL3",
"MSI2","RPL5","RPS2","RPS18","RPS3","MZB1","RPL10A","SELENOP","RPS5","EIF3E","MPO","ELANE","AZU1",
"CXCL8","SOX4","PRSS57","CAT","AREG","HBD","FCER1A","MS4A3","TPSAB1","GATA2","HBB","HBD","HBA1","HBA2","CA1",
"AHSP","BLVRB","KLF1","PRTN3","IL2RA","IGLL1","MZB1","MPO","PCLAF","TYMS","PCNA","TUBA1B","DUT","TUBB","HMGB2",
"TK1","HELLS","MCM7","MCM4","HMGB1","CDT1","CENPF","TOP2A","ASPM","MKI67","TUBB4B","CTSG","LYZ","CXCR4","HLA-DQA1",
"HLA-DQB1","LGALS2","HLA-DRA","HLA-DPB1","HLA-DPA1","HLA-DRB1","CD74","FCER1A","S100A9","S100A8","LYZ","S100A12",
"CD14","NCF1","GRN","S100A10","FCER1G","S100A6","S100A11","FCGR3A","MS4A7","FCER1G","S100A11","CD48",
"GNLY","NKG7","GZMB","GZMA","CD7")
genenames_to_plot <- unique(genenames_to_plot)
labelindexes <- match(genenames_to_plot,genes_to_plot_unique)
AML <- SetIdent(AML, value = "Timepoint_corr")
cellstoplot<- WhichCells(object = AML,idents = c("DX","D14","D30","REL","REL_NR"))
AML$TimepointID <- AML$Timepoint_corr
AML$TimepointID <- factor(AML$TimepointID, levels = c("DX","D14","D30","REL","REL_NR"))
PandY <- PurpleAndYellow(k = 50)
Set1 <- RColorBrewer::brewer.pal(9, "Set1")
Set2 <- RColorBrewer::brewer.pal(8, "Set2")
Set3 <- RColorBrewer::brewer.pal(12, "Set3")
values = c(Set1, Set2, Set3)[1:length(unique(AML$RNA_snn_res.0.6))]
valuesdonor = c(RColorBrewer::brewer.pal(12, "Paired"))[1:length(unique(AML$PatientID))]
valuestimepoint = c("#e5e5e5","#b2b2b2","#7f7f7f","#4c4c4c","#000000")
genenames_to_plot_reduced <- c("CD52","FAM30A","IGHM","CD99","TNFRSF4","GUCY1A1","EGFL7","ADGRE5","IGFBP2","MDM2",
"IFIT3","HSPA1A","HSPA1B","RAB31","HLA-A","CD34","LTB","HLA-DRB1","HOPX","IRF1","RPL3",
"MSI2","RPL5","RPS2","RPS18","MZB1","EIF3E","MPO","ELANE","AZU1",
"CXCL8","SOX4","PRSS57","CAT","AREG","HBD","FCER1A","GATA2","HBB","HBD","CA1",
"AHSP","BLVRB","KLF1","IL2RA","IGLL1","MZB1","MPO","PCNA","TUBA1B","TUBB","HMGB2",
"HELLS","MCM7","MCM4","HMGB1","CENPF","TOP2A","MKI67","TUBB4B","CTSG","LYZ","CXCR4","HLA-DQA1",
"HLA-DQB1","HLA-DRA","HLA-DPB1","CD74","FCER1A","S100A9","S100A8","LYZ",
"CD14","NCF1","GRN","FCER1G","S100A6","FCGR3A","MS4A7","FCER1G","CD48",
"GNLY","NKG7","GZMB","CD7")
genenames_to_plot_reduced <- unique(genenames_to_plot_reduced)
labelindexes_reduced <- match(genenames_to_plot_reduced,genes_to_plot_unique)
png(filename = "Figure_S1F.png", width = 16, height = 12, res = 700, units = "in")
dittoHeatmap(object = AML, genes = genes_to_plot_unique, complex = TRUE,
order.by = c("RNA_snn_res.0.6"), annot.by = c("RNA_snn_res.0.6","PatientID","TimepointID"),heatmap.colors = PandY,
use_raster = TRUE, cluster_cols = F, cluster_rows = F, scale = "none", slot = "scale.data", fontsize_row = 12,
breaks = seq(-2, 2, length.out = 51), show_rownames = F, cells.use = cellstoplot,
annot.colors = c(values, valuesdonor, valuestimepoint)) +
rowAnnotation(link = anno_mark(at = labelindexes_reduced,
labels = genenames_to_plot_reduced,
labels_gp = gpar(fontsize = 8), padding = unit(1, "mm")))
dev.off()
png(filename = "Figure_1B.png", width = 4, height = 4.4, res = 700, units = "in")
DimPlot(AML, group.by = "RNA_snn_res.0.6", label = T,
pt.size = 0.1, shuffle = T, cols = values, label.size = 5) + theme_void() +
ggtitle("NPM1mut AML blasts") + theme(plot.title = element_text(hjust = 0.5), legend.position = "none")
dev.off()
library(Seurat)
library(ggplot2)
library(ComplexHeatmap)
library(dplyr)
library(dittoSeq)
AML <- readRDS("../activeobj/Del7_active.rds")
markers_res06 <- AML@misc$markers_h$RNA_snn_h.orig.ident_res.0.6
markers_res06 <- subset.data.frame(x = markers_res06, subset = avg_logFC > 0)
markers_res06_l <- split(markers_res06, f = markers_res06$cluster)
names(markers_res06_l) <- paste0("cl",names(markers_res06_l))
pdf("Vlnplot_signature_AML_full_ordered.pdf")
VlnPlot(object = AML, features = "AML_up_full", sort = T, pt.size = 0, group.by = "RNA_snn_h.orig.ident_res.0.6")
dev.off()
AML$RNA_snn_res.0.6 <- factor(AML$RNA_snn_h.orig.ident_res.0.6, levels = c("10","1","0","5","9","3","4","6","2","8","7" ,"11"))
total_levels <- c("10","1","0","5","9","3","4","6","2","8","7" ,"11")
markers_res06_up <- list()
for (x in levels(AML$RNA_snn_res.0.6)) {
markers_res06_up[[paste0("cl",x)]] <- markers_res06_l[[paste0("cl",x)]]
}
for(nel in c(15,20,30,40)){
fun <- function(x) {
x %>% slice_max(order_by = avg_logFC, n = nel)
}
markers_res06_top <- lapply(markers_res06_up, FUN = fun)
genes_to_plot <- c()
tmp <- c()
for (x in total_levels) {
tmp <- markers_res06_top[[paste0("cl",x)]]$gene
genes_to_plot <- c(genes_to_plot, tmp)
}
genes_to_plot_unique <- unique(genes_to_plot)
genenames_to_plot <- c("NPDC1","HOPX","CD52","FAM30A","IGHM","PRDX1","CD99","NRIP1","SPINK2",
"SNHG12","HEMGN","CD34","IGHM","CD164","ANGPT1","S100A9","S100A8","LYZ",
"CD14","AREG","NEGR1","AGR2","CXCR4","FCER1A","CXCL8","GATA2","HBD","ID3",
"ID1","HES1","ID2","HBD","TUBA1B","TUBB","MKI67","TOP2A","CENPF","PCNA",
"MT-ND1","MT-ND2","MT-ATP6","MT-CYB","MPO","AZU1","LYZ","HLA-DPB1","HLA-DRB1",
"MS4A6A","HLA-DQA1","HLA-DRA","HLA-DQB1","HLA-DPA1","LTB","SPINK2",
"JUN","GNLY","GZMB","CD7","KLRD1","GZMK")
genenames_to_plot <- unique(genenames_to_plot)
labelindexes <- match(genenames_to_plot,genes_to_plot_unique)
AML <- SetIdent(AML, value = "RNA_Timepoint")
cellstoplot <- WhichCells(object = AML,idents = c("DX","D14","D30"))
AML$TimepointID <- AML$RNA_Timepoint
AML$TimepointID <- factor(AML$TimepointID, levels = c("DX","D14","D30"))
PandY <- PurpleAndYellow(k = 100)
Set1 <- RColorBrewer::brewer.pal(9, "Set1")
Set2 <- RColorBrewer::brewer.pal(8, "Set2")
Set3 <- RColorBrewer::brewer.pal(12, "Set3")
values = c(Set1, Set2, Set3)[1:length(total_levels)]
valuesdonor = c(RColorBrewer::brewer.pal(12, "Paired"))[1:length(unique(AML$RNA_PatientID))]
valuestimepoint = c("#e5e5e5","#b2b2b2","#7f7f7f")
AML$Timepoint <- AML$TimepointID
AML$Patient <- AML$RNA_PatientID
AML$Cluster <- AML$RNA_snn_res.0.6
png(filename = paste0("Top",nel,"markers_cl_res06_limitPandY.png"), width = 16, height = 12, res = 700, units = "in")
a <- dittoHeatmap(object = AML, genes = genes_to_plot_unique, complex = TRUE, border_color = NA, name = " ",
order.by = c("Cluster"), annot.by = c("Cluster","Patient","Timepoint"),heatmap.colors = PandY,
use_raster = TRUE, cluster_cols = F, cluster_rows = F, scale = "none", slot = "scale.data", fontsize_row = 12,
breaks = seq(-2, 2, length.out = 101), show_rownames = F, cells.use = cellstoplot,
annot.colors = c(values, valuesdonor, valuestimepoint)) +
rowAnnotation(link = anno_mark(at = labelindexes,
labels = genenames_to_plot,
labels_gp = gpar(fontsize = 7), padding = unit(1, "mm")))
draw(a, padding = unit(c(5, 5, 5, 5), "mm"))
dev.off()
}
library(DESeq2)
library(openxlsx)
library(ggplot2)
library(reshape2)
library(clusterProfiler)
library(pheatmap)
library(RColorBrewer)
library(dplyr)
dge_obj_full <- readRDS("20210804_202201_DGE.rds")
counts_normalized <- DESeq2::counts(object = dge_obj_full$deseq$ds, normalized = T)
# Gene expression heatmap of genes
# open rds of clustering - fetch terms and retrieve genes
clustering_data <- readRDS(file = "Figure_S4D_data.rds")
# genes2plot
genes2plot <- c(clustering_data$genelist$`GO:0006911_phagocytosis, engulfment`,
clustering_data$genelist$`GO:0099024_plasma membrane invagination`,
clustering_data$genelist$`GO:0006958_complement activation, classical pathway`,
clustering_data$genelist$`GO:0006910_phagocytosis, recognition`,
clustering_data$genelist$`GO:0008037_cell recognition`,
clustering_data$genelist$`GO:0006959_humoral immune response`,
clustering_data$genelist$`GO:0042742_defense response to bacterium`,
clustering_data$genelist$`GO:0050853_B cell receptor signaling pathway`,
clustering_data$genelist$`GO:0042113_B cell activation`,
clustering_data$genelist$`GO:0002250_adaptive immune response`,
clustering_data$genelist$`GO:0002449_lymphocyte mediated immunity`,
clustering_data$genelist$`GO:0051249_regulation of lymphocyte activation`,
clustering_data$genelist$`GO:0032944_regulation of mononuclear cell proliferation`,
clustering_data$genelist$`GO:0070661_leukocyte proliferation`,
clustering_data$genelist$`GO:0046651_lymphocyte proliferation`,
clustering_data$genelist$`GO:0030098_lymphocyte differentiation`,
clustering_data$genelist$`GO:0045580_regulation of T cell differentiation`,
clustering_data$genelist$`GO:0050870_positive regulation of T cell activation`,
clustering_data$genelist$`GO:0007159_leukocyte cell-cell adhesion`,
clustering_data$genelist$`GO:1903039_positive regulation of leukocyte cell-cell adhesion`,
clustering_data$genelist$`GO:0002228_natural killer cell mediated immunity`,
clustering_data$genelist$`GO:0002715_regulation of natural killer cell mediated immunity`,
clustering_data$genelist$`GO:0001910_regulation of leukocyte mediated cytotoxicity`,
clustering_data$genelist$`GO:0001909_leukocyte mediated cytotoxicity`,
clustering_data$genelist$`GO:0031341_regulation of cell killing`)
genes2plot <- base::unique(genes2plot)
counts_rlog_corr_genes2plot <- dge_obj_full$deseq$rlog_corr[genes2plot,]
counts_rlog_corr_genes2plot_df <- as.data.frame(t(counts_rlog_corr_genes2plot))
counts_rlog_corr_genes2plot_df <- merge.data.frame(x =counts_rlog_corr_genes2plot_df, y = dge_obj_full$sample_info)
counts_rlog_corr_genes2plot_df_melt <- melt(data = counts_rlog_corr_genes2plot_df, id.vars = c("patient","condition"))
counts_rlog_corr_genes2plot_df_light <- counts_rlog_corr_genes2plot_df_melt %>%
group_by(variable,condition,patient) %>%
summarize(AvgExpression_ptcondition = mean(value))
counts_rlog_corr_genes2plot_df_light$combo <- paste(counts_rlog_corr_genes2plot_df_light$patient, counts_rlog_corr_genes2plot_df_light$condition, sep = "_")
counts_rlog_corr_genes2plot_df_light_casted <- dcast(data = counts_rlog_corr_genes2plot_df_light, formula = combo ~ variable, value.var = "AvgExpression_ptcondition")
Annotations_colors_4_gexp_heatmap <- readRDS("Figure_4CDE_S4DE_colors.rds")
Annotations_colors_4_gexp_heatmap_nomir = Annotations_colors_4_gexp_heatmap
Annotations_colors_4_gexp_heatmap_nomir$`miR-126 activity` = NULL
SampleInfo_annotation_heatmap_gexp <- readRDS("Figure_4CDE_S4DE_annots.rds")
SampleInfo_annotation_heatmap_gexp_nomir = SampleInfo_annotation_heatmap_gexp
SampleInfo_annotation_heatmap_gexp_nomir$`miR-126 activity` = NULL
SampleInfo_annotation_heatmap_gexp_nomir$condition = NULL
SampleInfo_annotation_heatmap_gexp_nomir$Patient = SampleInfo_annotation_heatmap_gexp_nomir$patient
SampleInfo_annotation_heatmap_gexp_nomir$patient = NULL
colorsZ2bis <- c(seq(-4,4,by=0.1))
my_palette <- c("#021830",colorRampPalette(rev(brewer.pal(n=11,name="RdBu")))(n = length(colorsZ2bis)-3),"#33000f")
png("Figure_S4D.png", height = 12, width = 12, res = 700, units = "in")
pheatmap(mat = counts_rlog_corr_genes2plot, cellwidth = 8, cellheight = 4,fontsize_col = 5,
clustering_distance_cols = "manhattan", annotation_col = SampleInfo_annotation_heatmap_gexp_nomir,
annotation_colors = Annotations_colors_4_gexp_heatmap_nomir,
border_color = FALSE,
color = my_palette, breaks = colorsZ2bis, scale = "row", fontsize_row = 4)
dev.off()
# BulkRNAseq data analysis with PathfindR:
library(pathfindR)
library(pathfindR.data)
library(dplyr)
library(DESeq2)
# Load expression matrix:
rawdata <- read.table(file = "Full-gene-counts.txt") # GEO data
rawdata_m <- as.matrix(rawdata)
samples <- colnames(rawdata)
# Sample info
GFPhighsamples <- grep(".+H$", samples, value = T)
GFPlowsamples <- grep(".+L$", samples, value = T)
Treatedsamples <- c(grep(pattern = ".+TH$", samples, value = T), grep(pattern = ".+TL$", samples, value = T))
Controlsamples <- c(grep(pattern = ".+CH$", samples, value = T), grep(pattern = ".+CL$", samples, value = T))
HighOnly_TvsC <- read_excel("Figure_S4E_data.xlsx")
HighOnly_TvsC$Down_regulated <- NA
HighOnly_TvsC$Down_regulated <- as.character(HighOnly_TvsC$Down_regulated)
# load DGE analysis
DGE = readRDS("20210804_202201_DGE.rds")
normcounts = counts(DGE[["deseq"]][["ds"]], normalized=T)
score_matrix_HighOnly_TvsC <- score_terms(enrichment_table = HighOnly_TvsC,
exp_mat = normcounts[,GFPhighsamples],
cases = intersect(GFPhighsamples, Treatedsamples),
use_description = TRUE, # default FALSE
label_samples = FALSE, # default = TRUE
case_title = "Treated", # default = "Case"
control_title = "Control", # default = "Control"
low = "#f7797d", # default = "green"
mid = "#fffde4", # default = "black"
high = "#1f4037") # default = "red"
# annotation vectors/colors
Annotations_colors_4_gexp_heatmap <- readRDS("Figure_4CDE_S4DE_colors.rds")
Annotations_colors_4_gexp_heatmap_nomir = Annotations_colors_4_gexp_heatmap
Annotations_colors_4_gexp_heatmap_nomir$`miR-126 activity` = NULL
SampleInfo_annotation_heatmap_gexp <- readRDS("Figure_4CDE_S4DE_annots.rds")
SampleInfo_annotation_heatmap_gexp_nomir = SampleInfo_annotation_heatmap_gexp
SampleInfo_annotation_heatmap_gexp_nomir$`miR-126 activity` = NULL
SampleInfo_annotation_heatmap_gexp_nomir$condition = NULL
SampleInfo_annotation_heatmap_gexp_nomir$Patient = SampleInfo_annotation_heatmap_gexp_nomir$patient
SampleInfo_annotation_heatmap_gexp_nomir$patient = NULL
colorsZ2bis <- c(seq(-2,2,by=0.1))
my_palette <- c("#021830",colorRampPalette(rev(brewer.pal(n=11,name="RdBu")))(n = length(colorsZ2bis)-3),"#33000f")
png(filename = "Figure_S4E.png", width = 16, height = 9, res = 700, units = "in")
pheatmap::pheatmap(score_matrix_HighOnly_TvsC, scale = "none",
border_color = NA, fontsize_row = 8,
color = my_palette, breaks = colorsZ2bis,
show_rownames = T, annotation_names_col = T,
annotation_col = SampleInfo_annotation_heatmap_gexp_nomir,
annotation_colors = Annotations_colors_4_gexp_heatmap_nomir, legend = T,
cellwidth = 9, cellheight = 7,
cluster_rows = T, cutree_cols = 2, cutree_rows = 2)
dev.off()
callback = function(hc, score_matrix_HighOnly_TvsC){
sv = svd(t(score_matrix_HighOnly_TvsC))$v[,1]
dend = reorder(as.dendrogram(hc), wts = sv)
as.hclust(dend)
}
png(filename = "Figure_S4E_callback.png", width = 16, height = 9, res = 700, units = "in")
pheatmap::pheatmap(score_matrix_HighOnly_TvsC, scale = "none",
border_color = NA, fontsize_row = 8,
color = my_palette, breaks = colorsZ2bis,
show_rownames = T, annotation_names_col = T,
annotation_col = SampleInfo_annotation_heatmap_gexp_nomir,
annotation_colors = Annotations_colors_4_gexp_heatmap_nomir, legend = T,
cellwidth = 9, cellheight = 7,
cluster_rows = T, cutree_cols = 1, cutree_rows = 2, clustering_callback = callback)
dev.off()
library(Seurat)
library(SingleR)
library(Seurat)
library(SingleR)
library(scuttle)
# read all raw matrices and create a merge object
# on this dataset add metadata and remove all cells that do not have the annotation field
# Perform log normalization
# Convert to singlecellexperiment
# Perform singleR annotation by using custom dataset from VanGalen
# Downaload all raw data from VanGalen (GSE116256)
sampleids <- gsub(x = grep(x = grep(x = unlist(strsplit(x = list.files(path = "GSE116256_RAW/"), split = "_")),
pattern = "AML.*dem\\.txt$", value = T, perl = T),
pattern = "nanopore", value = T, invert = T), pattern = ".dem.txt", replacement = "")
# populated annotation and counts
all.objects <- list()
for (sid in sampleids) {
all.objects[[sid]][["annot"]] <- read.delim(file = list.files(path = "GSE116256_RAW/", pattern = paste0(sid,".anno"), full.names = T))
all.objects[[sid]][["matrix"]] <- read.delim(file = list.files(path = "GSE116256_RAW/", pattern = paste0(sid,".dem"), full.names = T), header = T, row.names = 1)
}
dir.create("rds_from_raw/")
for(samples in sampleids){
colnames(all.objects[[samples]][["matrix"]]) <- gsub(pattern = "\\.", replacement = "-", x = colnames(all.objects[[samples]][["matrix"]]))
all.objects[[samples]][["annot"]]$Cell <- as.character(all.objects[[samples]][["annot"]]$Cell)
rownames(all.objects[[samples]][["annot"]]) <- all.objects[[samples]][["annot"]]$Cell
all.objects[[samples]][["rds"]] <- CreateSeuratObject(counts = all.objects[[samples]][["matrix"]], project = paste0("VG_",samples),
min.cells = 0, min.features = 0, meta.data = all.objects[[samples]][["annot"]] )
all.objects[[samples]][["rds"]][["percent.mt"]] <- PercentageFeatureSet(all.objects[[samples]][["rds"]], pattern = "^MT-")
saveRDS(object = all.objects[[samples]][["rds"]], file = paste0("rds_from_raw/", samples,".rds"))
}
rds_objects <- list()
for(samples in sampleids[-1]){
rds_objects[[samples]] <- all.objects[[samples]][["rds"]]
}
obj <- merge(x = all.objects[[sampleids[1]]][["rds"]], y = rds_objects, project = "FullVGalen")
############# Creating singleR reference dataset ################
# Convert seurat object to singlecellexperiment
obj.sce <- as.SingleCellExperiment(obj)
obj.sce <- logNormCounts(obj.sce)
obj <- readRDS(file = "PDX_full.rds") # PDF full seurat object
obj@misc[["VGalen_CellType"]] <- SingleR(test = obj@assays$RNA@data,
ref = obj.sce,
labels = obj.sce$CellType, de.method="wilcox")
obj@meta.data[["VGalen_CellType"]] <- obj@misc[["VGalen_CellType"]]$pruned.labels
obj@misc[["VGalen_PredictionRefined"]] <- SingleR(test = obj@assays$RNA@data,
ref = obj.sce,
labels = obj.sce$PredictionRefined, de.method="wilcox")
obj@meta.data[["VGalen_PredictionRefined"]] <- obj@misc[["VGalen_PredictionRefined"]]$pruned.labels
obj.sce$CellPred_CellType <- paste0(obj.sce$PredictionRefined,"_",obj.sce$CellType)
obj@misc[["CellPred_CellType"]] <- SingleR(test = obj@assays$RNA@data,
ref = obj.sce,
labels = obj.sce$CellPred_CellType, de.method="wilcox")
obj@meta.data[["CellPred_CellType"]] <- obj@misc[["CellPred_CellType"]]$pruned.labels
Figure5A_B_C_D_data <- FetchData(object = obj, vars(c("CellID","RNA_DonorID","UMAP_1","UMAP_2","UMAPh_1","UMAPh_2",
"SingleRrefined_BlueprintEncodeData_labels","VGalen_CellType","VGalen_PredictionRefined",
"RNA_snn_h.RNA_DonorID_res.1.2")))
Figure5A_B_C_D_data$CellID <- rownames(Figure5A_B_C_D_data)
colnames(Figure5A_B_C_D_data)[1] <- "Patient"
colnames(Figure5A_B_C_D_data)[6] <- "BPE_refined"
colnames(Figure5A_B_C_D_data)[9] <- "Cluster_res_1.2"
write.xlsx(x = Figure5A_B_C_D_data, file = "Figure_S5_A_B_C_D_data.xlsx")
library(Seurat)
library(ggplot2)
library(ComplexHeatmap)
library(dplyr)
library(dittoSeq)
library(patchwork)
library(RColorBrewer)
# Load LSC subclustering object
obj <- readRDS("AML_LSC_subclustering.rds")
# Load markers from LSC subclustering object at resolution 0.6
markers <- readRDS("Figure_S6E_data.rds")
markers_split <- list()
for (x in unique(markers$cluster)) {
markers_split[[paste0("Cluster_",x)]] <- markers %>% filter(cluster == x)
write.table(markers_split[[paste0("Cluster_",x)]], file = paste0("Cluster_",x,"_markers.txt"),
sep = '\t', quote = FALSE)
}
fun <- function(x) {
x %>% slice_max(order_by = avg_logFC, n=10)
}
markers_split_up_top10 <- lapply(markers_split, FUN = fun)
genes_to_plot <- c()
tmp <- c()
for (x in names(markers_split_up_top10)) {
tmp <- markers_split_up_top10[[x]]$gene
genes_to_plot <- c(genes_to_plot, tmp)
}
genes_to_plot_unique <- unique(genes_to_plot)
PandY <- PurpleAndYellow(k = 50)
Set1 <- RColorBrewer::brewer.pal(9, "Set1")
Set2 <- RColorBrewer::brewer.pal(8, "Set2")
Set3 <- RColorBrewer::brewer.pal(12, "Set3")
values = c(Set1, Set2, Set3)[1:length(unique(obj$RNA_snn_h.PatientID_res.0.6))]
valuesdonor = c(RColorBrewer::brewer.pal(12, "Paired"))[1:length(unique(obj$PatientID))]
valuestimepoint = c("#e5e5e5","#b2b2b2","#7f7f7f","#4c4c4c","#000000")
obj$TimepointID <- obj$Timepoint_corr
png(filename = "Figure_S6E.png", width = 8, height = 8, res = 700, units = "in")
dittoHeatmap(object = obj, genes = genes_to_plot_unique, complex = TRUE,
order.by = "RNA_snn_h.PatientID_res.0.6", annot.by = c("RNA_snn_h.PatientID_res.0.6","PatientID", "TimepointID"), heatmap.colors = PandY,
use_raster = TRUE, cluster_cols = F, cluster_rows = F, scale = "none", slot = "scale.data", fontsize_row = 12,
breaks = seq(-2, 2, length.out = 51), show_rownames = T,
annot.colors = c(values, valuesdonor, valuestimepoint))
dev.off()
NMF <- function(inbam, sampleid, statsfile, Chromium_10X_CB_indexes, tags.of.interest = c("CB","UB","GN","RE")){
# load libraries
require(Rsamtools)
require(plyr)
require(ggplot2)
# define interval for fetching reads
which <- IRangesList("5"=IRanges(171410538L, 171410543L))
# Extract interesting tags
param <- ScanBamParam(which=which,tag=tags.of.interest, what=scanBamWhat())
filteredbam <- scanBam(inbam, param=param)
# Defining NPM1 mutated patterns
NPM1_patterns <- c("^CTCTGTCTGGCAG","^TCTGTCTGGCAG", "^CTGTCTGGCAG",
"^TGTCTGGCAG", "^GTCTGGCAG", "TCTCTGTCTGGC",
"GATCTCTGTCTGG$","GATCTCTGTCTG$", "GATCTCTGTCT$", "GATCTCTGTC$")
WT_pattern <- "TCTCTGGCAG"
# Reads matching NPM1 patterns
matches_NPM1_patterns_index <- grep(x = filteredbam$`5:171410538-171410543`$seq, pattern = paste(NPM1_patterns,collapse="|"), perl = T)
#cat(paste("Sampleid","Variable", "Value", sep=","), file=paste0(sampleid,"_stats.txt"), append=FALSE, sep = "\n")
cat(paste(sampleid,"N_Reads_matching_NPM1_patterns", length(matches_NPM1_patterns_index), sep=","), file=statsfile, append=TRUE, sep = "\n")
# Reads not matchingNPM1 pattern
matches_NO_NPM1_patterns_index <- grep(x = filteredbam$`5:171410538-171410543`$seq,
pattern = paste(NPM1_patterns,collapse="|"), perl = T, invert = T)
cat(paste(sampleid, "N_Reads_NOT_matching_NPM1_patterns", length(matches_NO_NPM1_patterns_index), sep=","), file=statsfile, append=TRUE, sep = "\n")
# Create temporal data.frame to retrieve WT patterns on original dataset
WT_tmp_df <- cbind(as.data.frame(filteredbam$`5:171410538-171410543`$seq[matches_NO_NPM1_patterns_index]), matches_NO_NPM1_patterns_index)
base::colnames(WT_tmp_df) <- c("seq","index_no_NPM1_pattern_main_bam")
matches_WT_patterns_index <- WT_tmp_df[grep(x = WT_tmp_df$seq, pattern = "TCTCTGGCAG"),]$index_no_NPM1_pattern_main_bam
cat(paste(sampleid,"N_Reads_NOT_matching_NPM1_patterns_but_matching_WT_pattern", length(matches_WT_patterns_index), sep=","), file=statsfile, append=TRUE, sep = "\n")
# subsetting values according to index from matches
mydata <- list()
for(i in names(filteredbam$`5:171410538-171410543`$tag)){
if(length(matches_NPM1_patterns_index) > 0){
mydata[["MUT"]][[i]] <- filteredbam$`5:171410538-171410543`$tag[[i]][matches_NPM1_patterns_index]
}
if(length(matches_WT_patterns_index) > 0){
mydata[["WT"]][[i]] <- filteredbam$`5:171410538-171410543`$tag[[i]][matches_WT_patterns_index]
}
}
# for both mut and wt:
# 1. convert list of tags to data.frame
# 2. add sampled id
# 3. filtering out reads that have NULL CB
# 4. filtering out reads with BC not in the sample 4-indexes seed (non possibile con certi bams)
# 5. Discard UMI duplicates
# 6. Counting N° UMI (UB) belonging to each barcode
df <- list()
counting <- list()
flag <- NULL
for(k in c("MUT", "WT")){
if(k == "MUT" & length(matches_NPM1_patterns_index) == 0){
flag <- "MUT"
cat(paste0(sampleid,",","N_Reads_supporting_", k, ",", 0),
file=statsfile, append=TRUE, sep = "\n")
cat(paste0(sampleid,",","N_Reads_supporting_", k, "_CB_BC_filt", ",", 0),
file=statsfile, append=TRUE, sep = "\n")
cat(paste0(sampleid,",","N_UMI_supporting_", k, ",", 0),
file=statsfile, append=TRUE, sep = "\n")
cat(paste0(sampleid,",","N_CB_supporting_", k, ",", 0),
file=statsfile, append=TRUE, sep = "\n")
next}
if(k == "WT" & length(matches_WT_patterns_index) == 0){
flag <- "WT"
cat(paste0(sampleid,",","N_Reads_supporting_", k, ",", 0),
file=statsfile, append=TRUE, sep = "\n")
cat(paste0(sampleid,",","N_Reads_supporting_", k, "_CB_BC_filt", ",", 0),
file=statsfile, append=TRUE, sep = "\n")
cat(paste0(sampleid,",","N_UMI_supporting_", k, ",", 0),
file=statsfile, append=TRUE, sep = "\n")
cat(paste0(sampleid,",","N_CB_supporting_", k, ",", 0),
file=statsfile, append=TRUE, sep = "\n")
next
}
if(length(matches_NPM1_patterns_index) > 0 & length(matches_WT_patterns_index) > 0){
flag <- "WT_MUT"
}
# 1. convert list of tags to data.frame
df[[k]] <- do.call(cbind, lapply(mydata[[k]], as.data.frame))
base::colnames(df[[k]]) <- tags.of.interest
# 2. add sampled id
df[[k]]$SM <- sampleid
cat(paste0(sampleid,",","N_Reads_supporting_", k, ",", base::nrow(df[[k]])),
file=statsfile, append=TRUE, sep = "\n")
# 3. filtering out reads that have NULL CB
# 4. filtering out reads with BC not in the sample 4-indexes seed
# df[[k]] <- base::subset.data.frame(x = df[[k]], subset = !is.na(CB) & BC %in% Chromium_10X_CB_indexes)
df[[k]] <- base::subset.data.frame(x = df[[k]], subset = !is.na(CB))
cat(paste0(sampleid,",","N_Reads_supporting_", k, "_CB_BC_filt", ",", base::nrow(df[[k]])),
file=statsfile, append=TRUE, sep = "\n")
# 5. Discard UMI duplicates
df[[k]] <- base::unique(df[[k]][,c("CB","UB")])
cat(paste0(sampleid,",","N_UMI_supporting_", k, ",", base::nrow(df[[k]])),
file=statsfile, append=TRUE, sep = "\n")
# 6. Counting N° UMI (UB) belonging to each barcode
counting[[k]] <- plyr::count(df = df[[k]], "CB")
cat(paste0(sampleid,",","N_CB_supporting_", k, ",", base::nrow(counting[[k]])),
file=statsfile, append=TRUE, sep = "\n")
if(k == "MUT"){
base::colnames(counting[[k]]) <- c("CB","MUT")
}
if(k == "WT"){
base::colnames(counting[[k]]) <- c("CB","WT")
}
}
print(paste0("Sample: ",sampleid ,"My flag: ", flag))
if(flag == "MUT"){
total <- counting[["WT"]]
total$MUT <- 0
}
if(flag == "WT"){
total <- counting[["MUT"]]
total$WT <- 0
}
if(!flag %in% c("WT","MUT")){
total <- merge.data.frame(x = counting[["MUT"]], y = counting[["WT"]], by = 'CB', all = T)
}
cat(paste0(sampleid,",","N_CB_supporting_both_MUT_and_WT", ",", base::nrow(total)),
file=statsfile, append=TRUE, sep = "\n")
# converting NA to 0
total[is.na(total)] <- 0
# recoding Cellbarcodes in order to fit with Seurat metadata
total$CB <- gsub(x = gsub(x = total$CB, pattern = "^", replacement = paste0(sampleid,"_"), perl = T), pattern = "-1$", replacement = "", perl = T)
return(total)
}
# Performing analysis on all samples
# loading function:
source(file = "NMF.R")
# write header stats.txt file
cat(paste("Sampleid","Variable", "Value", sep=","), file="NMF_full_stats.txt", append=FALSE, sep = "\n")
# read samplesheet file to match sampleid ---> 10X indexes
samplesheet <- read.csv(file = "SampleSheet_PT19.csv", stringsAsFactors = F)
# read 10x indexes
indexes_10X <- read.csv(file = "Chromium_10X_indexes.txt", header = F, stringsAsFactors = F)
# initialize list of results
npm1_metadata <- list()
for(sid in samplesheet$Sample){
current_SI <- subset.data.frame(x = samplesheet, subset = Sample == sid, select = 'Index')$Index
current_cb_seed <- as.character(subset.data.frame(x = indexes_10X, subset = V1 == current_SI, select = c("V2","V3","V4","V5")))
npm1_metadata[[sid]] <- NMF(inbam = paste0("/beegfs/scratch/ric.gentner/ric.gentner/scRNA_AML/M84toM97/results/PT19/00-NPM1_classification/input/",sid,"_possorted_genome_bam.bam"), sampleid = sid, Chromium_10X_CB_indexes = current_cb_seed, statsfile = "NMF_full_stats.txt")
}
# Merging all data frames in a single one:
NPM1_complete_data <- do.call(rbind, lapply(npm1_metadata, as.data.frame))
base::rownames(NPM1_complete_data) <- NPM1_complete_data$CB
NPM1_complete_data$Classification <- ifelse(NPM1_complete_data$MUT > 0, "MUT", ifelse(NPM1_complete_data$WT > 5, "WT","ND"))
write.table(x = NPM1_complete_data, file = "NPM1_PT19_data.txt", quote = F, row.names = F)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment