Commit f8a64d30 authored by Matteo Barcella's avatar Matteo Barcella
Browse files

script for scGATE annotation of Fig2H dataset using Fig2C dataset

parent 8716d431
File added
# Automatic Annotation of Fig2H dataset using scGATE leveraging Fig2C dataset as reference dataset
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(UCell)
library(scGate)
library(viridis)
library(stringr)
library(patchwork)
library(dittoSeq)
library(gridExtra)
library(Seurat)
library(openxlsx)
Fig2H_dataset <- readRDS("Fig2H_dataset.rds") # from pipeline
DefaultAssay(Fig2H_dataset) <- "alra"
length(Features(Fig2H_dataset)) # 14273
length(VariableFeatures(Fig2H_dataset)) # 2141
plotdir <- "plots/"
dir.create(path = plotdir)
#### SIGNATURES ####
# markers from Fig2C dataset
markers_lists <- list(level1 = NULL,
level2 = NULL)
dat01_markers_1 <- readRDS("Full_GSEA_markers_Population.rds")
dat01_markers_2 <- readRDS("Full_GSEA_markers_Classification.rds")
markers_lists[["level1"]] <- dat01_markers_1
markers_lists[["level2"]] <- dat01_markers_2
markers_genes_pos <- list(level1 = list(), level2 = list())
markers_genes_neg <- list(level1 = list(), level2 = list())
thresh.pct <- 0.2
ntop <- 15
for(lev in names(markers_lists)){
markers_lists[[lev]]$pctDelta <- markers_lists[[lev]]$pct.1 - markers_lists[[lev]]$pct.2
for (cl in unique(markers_lists[[lev]]$cluster)) {
tmpdata <- subset.data.frame(markers_lists[[lev]], cluster == cl)
# pickup top pos genes according to logFC and threshold
tmp.pos <- subset(tmpdata, subset = pctDelta > 0.2)
print(head(tmp.pos))
markers_genes_pos[[lev]][[cl]] <- head(tmp.pos[order(tmp.pos$avg_log2FC, decreasing = T),],ntop)$gene
# pickup top neg genes according to logFC and threshold
tmp.neg <- subset(tmpdata, subset = pctDelta < - 0.2)
markers_genes_neg[[lev]][[cl]] <- head(tmp.neg[order(tmp.neg$avg_log2FC, decreasing = F),],ntop)$gene
}
}
markers_listdir <- "markersdir_scGate_input/"
dir.create(markers_listdir)
saveRDS(markers_genes_neg, file = paste0(markers_listdir,"markers_genes_neg.rds"))
saveRDS(markers_genes_pos, file = paste0(markers_listdir,"markers_genes_pos.rds"))
# Inspecting % of expressing cells related to each set of genes ####
markers_genes <- list(pos = markers_genes_pos, neg = markers_genes_neg)
pct_genes <- markers_genes
for(direction in names(markers_genes)){
for(annlev in names(markers_genes[[direction]])){
for(cl in names(markers_genes[[direction]][[annlev]])){
pct_genes[[direction]][[annlev]][[cl]] <- NULL
if(length(markers_genes[[direction]][[annlev]][[cl]]) > 0){
for(cur.gene in markers_genes[[direction]][[annlev]][[cl]]){
test <- FetchData(object = Fig2H_dataset, vars = c("Patient","Timepoint",markers_genes[[direction]][[annlev]][[cl]]))
pct_genes[[direction]][[annlev]][[cl]][cur.gene] <- prop.table(table(test[[cur.gene]] > 0))["TRUE"]
}
}
else{
next
}
}
}
}
saveRDS(object = markers_genes, "Complete_list_of_markers_x_scGATE.rds")
saveRDS(object = pct_genes, "Complete_list_of_pctmarkers.rds")
#### Gating model ####
gate_models <- markers_genes
gate_models <- NULL
gate_models_onlypos <- NULL
for(direction in names(markers_genes)){
for(annlev in names(markers_genes[[direction]])){
for(cl in names(markers_genes[[direction]][[annlev]])){
if(length(markers_genes[[direction]][[annlev]][[cl]]) > 0){
gate_models[[annlev]][[cl]] <- scGate::gating_model(name=paste0(direction, "_", annlev, "_", cl), model = gate_models[[annlev]][[cl]] ,
signature = markers_genes[[direction]][[annlev]][[cl]],
level = gsub(x = annlev, pattern = "level", replacement = ""),
positive = ifelse(test = direction == "pos", TRUE, FALSE))
if(direction == "pos"){
gate_models_onlypos[[annlev]][[cl]] <- scGate::gating_model(name=paste0(direction, "_", annlev, "_", cl), model = gate_models_onlypos[[annlev]][[cl]] ,
signature = markers_genes[[direction]][[annlev]][[cl]],
level = gsub(x = annlev, pattern = "level", replacement = ""),
positive = ifelse(test = direction == "pos", TRUE, FALSE))
}else{
next
}
}
else{
next
}
}
}
}
Fig2H_dataset.backup <- Fig2H_dataset
Fig2H_dataset <- scGate(data = Fig2H_dataset, model = gate_models$level1)
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "is.pure", replacement = "is.pure.level1")
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "scGate_multi", replacement = "scGate_multi.level1")
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "CellOntology_ID", replacement = "CellOntology_ID.level1")
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "CellOntology_name", replacement = "CellOntology_name.level1")
Fig2H_dataset <- scGate(data = Fig2H_dataset, model = gate_models$level2)
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "^is.pure_",perl = T, replacement = "is.pure.level2_")
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "^scGate_multi$", perl = T,replacement = "scGate_multi.level2")
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "^CellOntology_ID$", perl = T,replacement = "CellOntology_ID.level2")
colnames(Fig2H_dataset@meta.data) <- gsub(x = colnames(Fig2H_dataset@meta.data), pattern = "^CellOntology_name$", perl = T,replacement = "CellOntology_name.level2")
Fig2H_dataset_onlypos <- Fig2H_dataset.backup
Fig2H_dataset_onlypos <- scGate(data = Fig2H_dataset_onlypos, model = gate_models_onlypos$level1)
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "is.pure", replacement = "is.pure.level1")
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "scGate_multi", replacement = "scGate_multi.level1")
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "CellOntology_ID", replacement = "CellOntology_ID.level1")
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "CellOntology_name", replacement = "CellOntology_name.level1")
Fig2H_dataset_onlypos <- scGate(data = Fig2H_dataset_onlypos, model = gate_models_onlypos$level2)
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "^is.pure_",perl = T, replacement = "is.pure.level2_")
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "^scGate_multi$", perl = T,replacement = "scGate_multi.level2")
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "^CellOntology_ID$", perl = T,replacement = "CellOntology_ID.level2")
colnames(Fig2H_dataset_onlypos@meta.data) <- gsub(x = colnames(Fig2H_dataset_onlypos@meta.data), pattern = "^CellOntology_name$", perl = T,replacement = "CellOntology_name.level2")
saveRDS(object = Fig2H_dataset, file = "Fig2H_dataset_annotated_with_scGATE_pos_neg_d01.rds")
saveRDS(object = Fig2H_dataset_onlypos, file = "Fig2H_dataset_annotated_with_scGATE_pos_only_d01.rds")
# Visualize gating results
colpalette_lvl1 <- colorRampPalette(brewer.pal(12, name = "Paired"))(length(unique(Fig2H_dataset@meta.data$scGate_multi.level1)))
png(filename = paste0(plotdir,"scGATE_level1_multi_pos_neg.png"), width = 4, height = 6, units = "in", res = 300)
dittoSeq::dittoDimPlot(object = Fig2H_dataset, reduction.use = "umap.harmony.pt.tp",
var = "scGate_multi.level1", opacity = 0.8, do.label = T,labels.size = 3.5,
labels.highlight = F, labels.repel = T, size = 0.6) +
scale_color_manual(values = colpalette_lvl1) +
theme(legend.position = "bottom", plot.title = element_blank(),
axis.line = element_line(color = "black"),
axis.text = element_text(size = 12, color = "black"))
dev.off()
# Re-coding e re-finement of scGATE annotation:
# Provisional annotation: Provv_Annotation variable
Fig2H_dataset_onlypos$Provv_Annotation <- NA
for (i in 1:nrow(Fig2H_dataset_onlypos)) {
scGate_multi <- Fig2H_dataset_onlypos@meta.data[["scGate_multi.level1"]][i]
if (!is.na(scGate_multi) && scGate_multi == "Multi") {
is_pure_HSC <- Fig2H_dataset_onlypos@meta.data[["is.pure.level1_HSC"]][i]
is_pure_HSPC <- Fig2H_dataset_onlypos@meta.data[["is.pure.level1_HSPC"]][i]
is_pure_Mye_Lineage <- Fig2H_dataset_onlypos@meta.data[["is.pure.level1_Mye Lineage"]][i]
is_pure_Mk_Ery_Lineage <- Fig2H_dataset_onlypos@meta.data[["is.pure.level1_Mk-Ery Lineage"]][i]
hsc_pure <- !is.na(is_pure_HSC) && is_pure_HSC == "Pure"
hspc_pure <- !is.na(is_pure_HSPC) && is_pure_HSPC == "Pure"
mye_pure <- !is.na(is_pure_Mye_Lineage) && is_pure_Mye_Lineage == "Pure"
ery_pure <- !is.na(is_pure_Mk_Ery_Lineage) && is_pure_Mk_Ery_Lineage == "Pure"
if (!is.na(hsc_pure) && hsc_pure && !is.na(hspc_pure) && hspc_pure) {
Fig2H_dataset_onlypos$Provv_Annotation[i] <- "HSC"
} else if (!is.na(hspc_pure) && hspc_pure && !is.na(hsc_pure) && !hsc_pure) {
Fig2H_dataset_onlypos$Provv_Annotation[i] <- "HSPC"
} else if (!is.na(hsc_pure) && hsc_pure && !is.na(mye_pure) && mye_pure) {
Fig2H_dataset_onlypos$Provv_Annotation[i] <- "Undefined"
} else if (!is.na(hsc_pure) && hsc_pure && !is.na(ery_pure) && ery_pure) {
Fig2H_dataset_onlypos$Provv_Annotation[i] <- "Undefined"
} else {
Fig2H_dataset_onlypos$Provv_Annotation[i] <- "TBD"
}
} else {
if (!is.na(scGate_multi)) {
Fig2H_dataset_onlypos$Provv_Annotation[i] <- scGate_multi
} else {
Fig2H_dataset_onlypos$Provv_Annotation[i] <- "Unknown" # If NA assign Unknown
}
}
}
# Additional TBD refinement
TBD_ref <- FetchData(object = Fig2H_dataset_onlypos,
vars = c("Provv_Annotation",
grep(pattern = "is.pure.level1_", x = colnames(Fig2H_dataset_onlypos@meta.data), value = T),
"scGate_multi.level1")
)
library(tidyr)
df_unito <- unite(TBD_ref, comboannot, grep(pattern = "is.pure.level1_", x = colnames(Fig2H_dataset_onlypos@meta.data), value = T), sep = "", remove = TRUE)
df_unito$binary <- gsub(pattern = "Impure", replacement = 0, x = df_unito$comboannot)
df_unito$binary <- gsub(pattern = "Pure", replacement = 1, x = df_unito$binary)
df_unito$Provv_Annotation2 <- df_unito$Provv_Annotation # backup variable
gsub(x = colnames(TBD_ref)[2:8], pattern = "is.pure.level1_", replacement = "")
# "HSC" "HSPC" "MonoProg" "Mye Lineage" "MCP" "Mk-Ery Lineage" "Prog-Cycling"
# sharing with cycling goes to other (if single)
df_unito$Provv_Annotation2[which(df_unito$binary == "1000001" & df_unito$scGate_multi.level1 == "Multi")] <- "HSC"
df_unito$Provv_Annotation2[which(df_unito$binary == "0010001" & df_unito$scGate_multi.level1 == "Multi")] <- "MonoProg"
df_unito$Provv_Annotation2[which(df_unito$binary == "0001001" & df_unito$scGate_multi.level1 == "Multi")] <- "Mye Lineage"
df_unito$Provv_Annotation2[which(df_unito$binary == "0000101" & df_unito$scGate_multi.level1 == "Multi")] <- "MCP"
df_unito$Provv_Annotation2[which(df_unito$binary == "0000011" & df_unito$scGate_multi.level1 == "Multi")] <- "Mk-Ery Lineage"
# handling HSC + diff ---- > Undefined
df_unito$Provv_Annotation2[which(df_unito$binary == "1010000" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "1010001" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "1001000" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "1001001" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "1000100" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "1000101" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "1000010" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "1000011" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
# handling mixtures mono-ery ---- > Undefined
df_unito$Provv_Annotation2[which(df_unito$binary == "0010010" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
df_unito$Provv_Annotation2[which(df_unito$binary == "0001010" & df_unito$scGate_multi.level1 == "Multi")] <- "Undefined"
# MCP + Ery ---> undef
df_unito$Provv_Annotation2[which(df_unito$binary == "0000110" & df_unito$scGate_multi.level1 == "Multi")] <- "Mk-Ery Lineage"
df_unito$Provv_Annotation2[which(df_unito$binary == "0000111" & df_unito$scGate_multi.level1 == "Multi")] <- "Mk-Ery Lineage"
# Shared between myeloid lineage and MonoProg -- > myeloid lineage
df_unito$Provv_Annotation2[which(df_unito$binary == "0011000" & df_unito$scGate_multi.level1 == "Multi")] <- "MonoProg"
df_unito$Provv_Annotation2[which(df_unito$binary == "0011001" & df_unito$scGate_multi.level1 == "Multi")] <- "MonoProg"
df_unito$RefinedAnnotation <- df_unito$Provv_Annotation2
mymeta <- df_unito[,"RefinedAnnotation", drop = F]
Fig2H_dataset_onlypos <- AddMetaData(object = Fig2H_dataset_onlypos, metadata = mymeta)
# Dittoseq with final annotation
pleng <- length(unique(Fig2H_dataset_onlypos@meta.data$RefinedAnnotation))
colpalette_refannots <- sample(brewer.pal(12, name = "Paired"),pleng)
Fig2H_dataset_onlypos@meta.data$RefinedAnnotation <- factor(x = Fig2H_dataset_onlypos@meta.data$RefinedAnnotation,
levels = c("HSC","HSPC","MonoProg","Mye Lineage","MCP",
"Mk-Ery Lineage","Prog-Cycling",
"Unknown","Undefined"),
labels = c("HSC","HSPC","MonoProg","Mye Lineage","MCP",
"Mk-Ery Lineage","Prog-Cycling",
"Unk/Und","Unk/Und"))
colpalette3 <- c("#A6CEE3","#B2DF8A","#FDBF6F","#E78AC3","#FFFF99","#E31A1C","darkgrey","#C4A484")#,"#D3D3D3")
png(filename = paste0(plotdir,"scGATE_RefinedAnnotation.png"), width = 6, height = 6, units = "in", res = 300)
dittoSeq::dittoDimPlot(object = Fig2H_dataset_onlypos, reduction.use = "umap.harmony.pt.tp",
var = "RefinedAnnotation", opacity = 0.8, do.label = T,labels.size = 6,
labels.highlight = F, labels.repel = T, size = 1.5) +
scale_color_manual(values = colpalette3) +
theme(legend.position = c(0.88,0.2),
legend.title = element_blank(),
legend.key.size = unit(0.001, "in"),
legend.key.spacing = unit(0,"in"),
plot.title = element_blank(),
axis.line = element_line(color = "black"),
axis.text = element_text(size = 12, color = "black"))
dev.off()
png(filename = paste0(plotdir,"DimPlot_PatientID.png"), width = 6, height = 6, units = "in", res = 300)
dittoSeq::dittoDimPlot(object = Fig2H_dataset_onlypos, reduction.use = "umap.harmony.pt.tp",
var = "Patient", opacity = 1, do.label = T,labels.size = 3.5,
labels.highlight = F, labels.repel = T, size = 1) +
scale_color_manual(values = colpalette_refannots) +
theme(legend.position = c(0.88,0.2),
legend.title = element_blank(),
legend.key.size = unit(0.001, "in"),
legend.key.spacing = unit(0,"in"),
plot.title = element_blank(),
axis.line = element_line(color = "black"),
axis.text = element_text(size = 12, color = "black"))
dev.off()
# Adding WPRE INFO
#### WPRE expression ####
WPRE_obj <- readRDS("WPRE_obj.rds")
WPRE_raw_counts <- FetchData(object = WPRE_obj, vars = "WPRE")
Fig2H_dataset_onlypos <- AddMetaData(Fig2H_dataset_onlypos, WPRE_raw_counts)
Fig2H_dataset_onlypos@meta.data$WPRE_condition <- ifelse(Fig2H_dataset_onlypos@meta.data["WPRE"] > 0, "WPRE-POS", "WPRE-NEG")
Fig2H_dataset_onlypos@meta.data$WPRE_condition[is.na(Fig2H_dataset_onlypos@meta.data$WPRE_condition)] <- "WPRE-NEG"
umap_wpre <- dittoSeq::dittoDimPlot(object = Fig2H_dataset_onlypos, reduction.use = "umap.harmony.pt.tp",order = "increasing",
var = "WPRE_condition", opacity = 0.8, do.label = F,labels.size = 4,
labels.highlight = F, labels.repel = T, size = 1) +
scale_color_manual(values = c("#A0A0A0B3","#FF3333B3")) +
xlab("UMAP_1") +
ylab(label = "UMAP_2") +
theme(legend.position = "none",# c(0.16,0.9),
legend.title = element_blank(),
legend.key.size = unit(0.001, "in"),
legend.key.spacing = unit(0,"in"),
plot.title = element_blank(),
#axis.line = element_line(color = "black"),
axis.text = element_text(size = 16)
)
# Figure 2I
png(filename = paste0(plotdir,"WPRE_positivity.png"), width = 4, height = 4, units = "in", res = 300)
umap_wpre
dev.off()
pdf(file = paste0(plotdir,"WPRE_positivity.pdf"), width = 4, height = 4)
umap_wpre
dev.off()
library(plyr)
# Create an empty list to store plots
plots_list <- list()
data_list <- list()
data_list_freq <- list()
# Loop through each unique patient
for(patient in unique(Fig2H_dataset_onlypos@meta.data$Patient)) {
# Filter the data for the current patient
patient_data <- Fig2H_dataset_onlypos@meta.data[Fig2H_dataset_onlypos@meta.data$Patient == patient, ]
patient_data$Timepoint <- factor(patient_data$Timepoint, levels = c("dp","day4","day7"))
# Initialize a list to store results for each population within the patient
WPRE_in_each_pop <- list()
WPRE_in_each_pop_numbers <- list()
# Loop through each unique population in the patient-specific metadata
for(pops in unique(patient_data$RefinedAnnotation)) {
# Create the table for each population and calculate proportions
WPRE_in_each_pop[[pops]] <- as.data.frame(round(prop.table(table(patient_data[patient_data$RefinedAnnotation == pops,]$WPRE_condition,
patient_data[patient_data$RefinedAnnotation == pops,]$Timepoint), 2)*100,2))
WPRE_in_each_pop_numbers[[pops]] <- as.data.frame(table(patient_data[patient_data$RefinedAnnotation == pops,]$WPRE_condition,
patient_data[patient_data$RefinedAnnotation == pops,]$Timepoint))
WPRE_in_each_pop_numbers[[pops]]$CellPopulation <- pops
# Add the population (cluster) label to the dataframe
WPRE_in_each_pop[[pops]]$CellPopulation <- pops
}
# Combine results across populations for the current patient
df_WPRE_res_num <- plyr::ldply(WPRE_in_each_pop_numbers, data.frame)
df_WPRE_res_num$Patient <- patient
df_WPRE_res <- plyr::ldply(WPRE_in_each_pop, data.frame)
df_WPRE_res$Patient <- patient
print(head(df_WPRE_res))
# Generate the plot for this patient
data_list_freq[[patient]] <- df_WPRE_res
data_list[[patient]] <- df_WPRE_res_num
}
# Data file S3 sheets
saveRDS(object = data_list_freq, file = "WPRE_freqs_data.rds")
saveRDS(object = data_list, file = "WPRE_ncells_data.rds")
write.xlsx(x = data_list_freq, file = "WPRE_freqs_data.xlsx")
write.xlsx(x = data_list, file = "WPRE_ncells_data.xlsx")
saveRDS(object = Fig2H_dataset.rds_onlypos, "Fig2H_dataset_scGATE_WPRE.rds")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment