diff --git a/HTO_demux.R b/HTO_demux.R new file mode 100644 index 0000000000000000000000000000000000000000..ddf127823f3f30709bd723aceaa2f1be3e412d3c --- /dev/null +++ b/HTO_demux.R @@ -0,0 +1,434 @@ +library(scales) +library(viridis) +library(Seurat) +library(stringr) +library(tibble) +library(dplyr) +library(patchwork) +library(ggplot2) +library(tidyr) +library(RColorBrewer) +library(openxlsx) +library(cowplot) + +args <- commandArgs(trailingOnly = TRUE) + + +######################################################################################### +# KEEP ONLY BARCODE USED IN EXPERIMENT FROM FILE, NORMALIZE HTO AND DEMUX +######################################################################################### +filterbarcodes <- function(object_seurat,i,marg.norm) { + cat('## FILTER BACRODES') + counts <- GetAssayData(object_seurat, slot="counts", assay="HTO") + cat('number of cells for each HTO ') + cat(rowSums(data.frame(counts))) + + hto.filter <- str_split(hto_conditions[hto_conditions$sample == sampleid, 'HTO'], ',')[[1]] #hto_conditions[[sampleid]] + counts.sub <- counts[hto.filter,] + + ##filter and update number of cells per sample + hto.assay <- CreateSeuratObject(counts=counts.sub) + object_seurat@assays$HTO <- NULL ### to use if you want to subset HTO + object_seurat@assays$HTO <- CreateAssayObject(counts = counts.sub) ### ### to use if you want to subset HTO + object_seurat@assays$HTO@key <- 'hto_' + cat('Normalize HTO data') + scrna.hashtag <- NormalizeData(object_seurat, assay = "HTO",normalization.method = "CLR", margin = marg.norm) + scrna.hashtag <- HTODemux(scrna.hashtag, assay = "HTO", positive.quantile = i, seed = 42) + + ## rerturn list of barcodes and obj demultiplexed + return(list(sampleid, + obj_demux=scrna.hashtag, + barcodes=rownames(scrna.hashtag@assays$HTO))) + +} + + +######################################################################################### +# CHOOSE POSITIVE QUANTILE PARAMETER +######################################################################################### +eval_pq <- function(out_dir, obj.combined, sampleid, library_id, min_feature, max_feature, mito.prefix, max_pc_mito, marg.norm) { +eval_pq_out_list <- list() + + #cat(sampleid) + eval_pq_out <- tibble() + #obj.combined = readRDS(paste(folder,sampleid,paste0(sampleid,'_minimal.rds'), sep='/')) + + DefaultAssay(obj.combined) <- 'RNA' + print(DefaultAssay(obj.combined)) + + cat('filter cells', mito.prefix) + # Compute mito % + obj.combined[["percent.mt"]] <- PercentageFeatureSet(obj.combined, assay = 'RNA', pattern = mito.prefix) + obj <- obj.combined %>% + subset( + nFeature_RNA > min_feature & ### Remove cells with < 250 detected genes + nFeature_RNA < max_feature & ### Remove cells with > 2500 detected genes (could be doublets) + percent.mt < max_pc_mito ### Remove cells with > 0.15 mito/total reads + ) + +# filtering object based on hto count to zero +DefaultAssay(obj) <- 'HTO' +print(DefaultAssay(obj)) +obj <- subset(obj,subset = nCount_HTO != 0 ) + +cat('find pq') +for (i in seq(from = 0.9, to = 0.999999999, by = 0.005)) { + cat(' demultiplexing cells based on HTO') + + x <- filterbarcodes(obj, i, marg.norm) + + + object <- x$obj_demux + cat('save pq') + HTO_classification <- as.data.frame.matrix(table( + object@meta.data$HTO_maxID, + object@meta.data$HTO_classification.global + )) %>% + summarise(max.singlet = sum(Singlet)) %>% + mutate(threshold = i) + + eval_pq_out <- bind_rows(eval_pq_out, HTO_classification) %>% + mutate(library = sampleid) +} +eval_pq_out_list <- rbind( eval_pq_out_list, eval_pq_out) + +result_list <- list( +"eval_pq_out" = eval_pq_out_list +) + +return(result_list) +} + +obj_minimal <- args[1] # Sample minimal object +obj_combined <- readRDS(obj_minimal) +out_dir <- args[2] # Path to output result folder (results) +sampleid <- args[3] #Sample id +#min_cells <- as.numeric(args[4]) +min_feature <- as.numeric(args[4]) +max_feature <- as.numeric(args[5]) +mito.prefix = args[6] +max_pc_mito <- as.numeric(args[7]) +marg.norm <- as.numeric(args[8]) +hto <- args[9] +cat('########## mito prefix #######') +cat(mito.prefix) +hto_conditions <- read.table(hto, header = TRUE, stringsAsFactors = FALSE) # Table with HTO list per sample + +## Evaluate positive quantile parameter +eval_pq_lib1 <- eval_pq(out_dir, obj_combined, sampleid, hto_conditions, min_feature, max_feature, mito.prefix, max_pc_mito, marg.norm) + +## Plot number of Singlet detected +pq <- eval_pq_lib1$eval_pq_out %>% + ggplot(aes(x = threshold, y = max.singlet, fill = library, color = library)) + + geom_line() + + geom_point() + + scale_color_brewer(palette = "Set1", direction = -1) + + labs(x = "Postive Quantile", y = "# Singlet cells", fill = "", color = "") + + theme_bw() + + +out_dir_sample <- paste(out_dir, sampleid, 'plots',sep = "/") +dir.create(out_dir_sample, showWarnings = FALSE) + +png(filename = paste(out_dir_sample, paste0(sampleid,"_max_threshod_pq_margin_",as.character(marg.norm),'.png'), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(pq) +dev.off() + +## save threshold +dx <- data.frame(eval_pq_lib1$eval_pq_out) +dx$library <- as.factor(dx$library) +max_threshold <- merge(aggregate(max.singlet ~ library, data=dx, max), dx, all.x=T) +write.table(max_threshold,paste(out_dir_sample, paste0(sampleid,"_max_threshod_pq_margin_",as.character(marg.norm),".txt"), sep = '/')) + +# ##################################################################################### +# # After computing best pq with higher number of singlets perform demux with max value +# ##################################################################################### +cat(sampleid) +maxi <- max_threshold[max_threshold$library == sampleid, 'threshold'] + +# sample minimal object +obj <- readRDS(obj_minimal) + +### Add mitochondrial percentage to meta.data table +DefaultAssay(obj) <- 'RNA' +print(DefaultAssay(obj)) + +# Compute mito % +obj[["percent.mt"]] <- PercentageFeatureSet(obj, assay = 'RNA', pattern = mito.prefix) +obj <- obj %>% + subset( + nFeature_RNA > min_feature & ### Remove cells with < 250 detected genes + nFeature_RNA < max_feature & ### Remove cells with > 2500 detected genes (could be doublets) + percent.mt < max_pc_mito ### Remove cells with > 0.15 mito/total reads + ) + +# filtering object based on hto count to zero +DefaultAssay(obj) <- 'HTO' + +print(DefaultAssay(obj)) +obj <- subset(obj,subset = nCount_HTO != 0 ) + +table(obj$orig.ident) + +cat('Demultiplexing cells based on HTO') + +x <- filterbarcodes(obj, maxi, marg.norm) + +scrna.hashtag <- x$obj_demux +rowSums(data.frame(scrna.hashtag@assays$HTO@counts)) + +# Save demux Seurat object +#oiut_dir_sample <- paste(out_dir, sampleid, sep = "/") +#dir.create(out_dir_sample, showWarnings = FALSE) +DefaultAssay(scrna.hashtag) <- "RNA" +out_dir <- paste(out_dir, sampleid, sep = '/') +saveRDS(object = scrna.hashtag, file = paste(out_dir, paste(sampleid, "demux_minimal.rds", sep = "_"), sep = "/")) + +cat(x$barcodes) + +cat('visualize demultiplexing - Global classification results', sampleid) +class_hto <- list() +class_hto[[sampleid]] <- table(scrna.hashtag$HTO_classification) +class_hto[[sampleid]] + +class_htoglobal <- list() +class_htoglobal[[sampleid]] <- table(scrna.hashtag$HTO_classification.global) +class_htoglobal[[sampleid]] + +cat('Group cells based on the max HTO signal') +Idents(scrna.hashtag) <- "HTO_maxID" +DefaultAssay(scrna.hashtag) <- "HTO" +rdgplot <- scrna.hashtag %>% + RidgePlot( + assay = "HTO", + features = rownames(scrna.hashtag), + ncol = 2) + +png(filename = paste(out_dir_sample, paste0(sampleid,"_ridgeplot.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(rdgplot) +dev.off() + +### Calculate doublet rate +scrna.hashtag@meta.data %>% + group_by(hash.ID) %>% + summarize(fract = n() / nrow(.)) +### Create bar graphs comparing cell count for each sample +HTO_bars_1 <- scrna.hashtag@meta.data %>% + rownames_to_column("cell_id") %>% + ggplot(aes(hash.ID, fill = hash.ID)) + + geom_bar() + + labs(y = "Cell Count") + + cowplot::theme_cowplot() + + theme( + legend.position = "none", + axis.title.x = element_blank(), + axis.text.x = element_text(hjust = 1, angle = 45) + ) +### Create stacked bar graph showing fraction of cells +HTO_bars_2 <- scrna.hashtag@meta.data %>% + rownames_to_column("cell_id") %>% + group_by(hash.ID) %>% + summarize(hash_frac = n() / nrow(.)) %>% + ggplot(aes("scRNA-seq", hash_frac, fill = hash.ID)) + + geom_bar(stat = "identity", color = "white", size = 0.5) + + labs(y = "Fraction of Cells", x= sampleid) + + cowplot::theme_cowplot() + + theme( + legend.title = element_blank(), + axis.title.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank() + ) + ### Combine plots + ### now add the title + # title <- ggdraw() + + # draw_label( + # sampleid, + # fontface = 'bold', + # x = 0, + # hjust = 0 + # ) + + # theme( + # ### add margin on the left of the drawing canvas, + # ### so title is aligned with left edge of first plot + # plot.margin = margin(0, 0, 0, 7) + # ) +x <- plot_grid(#title, + HTO_bars_1, HTO_bars_2, + nrow = 1, + rel_widths = c(0.5, 0.5), + rel_heights = c(1, 0.8) + ) + +png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_bars.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(x) +dev.off() + +# cell count by hto classification +scrna.hashtag@meta.data %>% + group_by(HTO_classification) %>% + summarize(fract = n() / nrow(.)) + +### Create bar graphs comparing cell count by HTO classification +HTO_bars_3 <- scrna.hashtag@meta.data %>% + rownames_to_column("cell_id") %>% + ggplot(aes(HTO_classification, fill = HTO_classification)) + + geom_bar() + + labs(y = "Cell Count") + + cowplot::theme_cowplot() + + theme( + legend.position = "none", + axis.title.x = element_blank(), + axis.text.x = element_text(hjust = 1, angle = 45) + ) +### Create stacked bar graph showing fraction of cells +HTO_bars_4 <- scrna.hashtag@meta.data %>% + rownames_to_column("cell_id") %>% + group_by(HTO_classification) %>% + summarize(hash_frac = n() / nrow(.)) %>% + ggplot(aes("scRNA-seq", hash_frac, fill = HTO_classification)) + + geom_bar(stat = "identity", color = "white", size = 0.5) + + labs(y = "Fraction of Cells", x= sampleid) + + cowplot::theme_cowplot() + + theme( + legend.title = element_blank(), + axis.title.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank() + ) +### Combine plots +x <- plot_grid(#title, + HTO_bars_3, HTO_bars_4, + nrow = 1, + rel_widths = c(0.5, 0.5), + rel_heights = c(1, 0.8) + ) + +png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_bars_classification.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(x) +dev.off() + +fs <- FeatureScatter(scrna.hashtag, feature1 = "hto_H1", feature2 = "hto_H2") + +png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_1_2_featurescatter.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(fs) +dev.off() + + +Idents(scrna.hashtag) <- "HTO_classification.global" +vnlp <- VlnPlot(scrna.hashtag, features = "nCount_RNA", pt.size = 0.1, log = TRUE) + +png(filename = paste(out_dir_sample, paste0(sampleid,"_ncount_RNA.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(vnlp) +dev.off() + +cat('remove negative cells from the object') +scrna.hashtag.subset <- subset(scrna.hashtag, idents = "Negative", invert = TRUE) + +cat('Calculate a tSNE embedding of the HTO data') +DefaultAssay(scrna.hashtag.subset) <- "HTO" +scrna.hashtag.subset <- ScaleData(scrna.hashtag.subset, features = rownames(scrna.hashtag.subset), + verbose = FALSE) +scrna.hashtag.subset <- RunPCA(scrna.hashtag.subset, features = rownames(scrna.hashtag.subset), approx = FALSE) + +# Calculate a distance matrix using HTO +hto.dist.mtx <- as.matrix(dist(t(GetAssayData(object = scrna.hashtag.subset, assay = "HTO")))) + +scrna.hashtag.subset <- RunTSNE(scrna.hashtag.subset, dims = 1:length( hto_conditions[[sampleid]]), perplexity = 100,distance.matrix = hto.dist.mtx, + check_duplicates = FALSE) + + +Idents(scrna.hashtag.subset) <- 'HTO_classification' +pdim_hto <- DimPlot(scrna.hashtag.subset,label=TRUE) + +png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_dimplot.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(pdim_hto) +dev.off() + +Idents(scrna.hashtag.subset) <- 'HTO_classification.global' +pdim_hto_cl <- DimPlot(scrna.hashtag.subset,label=TRUE) +png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_global_dimplot.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(pdim_hto_cl) +dev.off() + +htohm <- HTOHeatmap(scrna.hashtag.subset, assay = "HTO", ncells = 5000) +png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_heatmap.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(htohm) +dev.off() + + +## count all barcodes +cat('count HTO reads') +count_HTO_reads <- list() +count_HTO_reads[[sampleid]] <- rowSums(data.frame(obj@assays$HTO@counts)) +count_HTO_reads[[sampleid]] + +###cat('Extract the singlets') +scrna.hashtag.singlet <- subset(scrna.hashtag.subset, idents = "Singlet") +count_HTO_reads_singlet <- list() +count_HTO_reads_singlet[[sampleid]] <- rowSums(data.frame(scrna.hashtag.singlet@assays$HTO@counts)) +cat('count HTO singlet reads\n') +count_HTO_reads_singlet[[sampleid]] + + +Idents(scrna.hashtag.singlet) <- "HTO_classification" +count_HTO_reads_singlet_HTO <- list() +count_HTO_reads_singlet_HTO_mean <- list() +for (n in rownames(scrna.hashtag.singlet@assays$HTO@counts)){ + x <- subset(scrna.hashtag.singlet, idents = n ) + cat('hto', n) + print(rowSums(data.frame(x@assays$HTO@counts))) + ## count of reads HTO in cell classified as specific HTO + count_HTO_reads_singlet_HTO[[sampleid]][n] <- rowSums(data.frame(x@assays$HTO@counts))[n] + ## MEAN read count per hashatg over cell classified as specific-HTO + count_HTO_reads_singlet_HTO_mean[[sampleid]][n] <- unname(rowSums(data.frame(x@assays$HTO@counts))[n])/unname(class_hto[[sampleid]][n]) + } + +cat('count HTO reads, only in cell classified as singlet for specific HTO\n') +count_HTO_reads_singlet_HTO + +cat('mean HTO reads, only in cell classified as singlet for specific HTO\n') +count_HTO_reads_singlet_HTO_mean + +Idents(scrna.hashtag.subset) <- 'HTO_classification.global' +### Projecting singlet identities on TSNE visualization + +dimpl_htocl <- DimPlot(scrna.hashtag.singlet, group.by = "HTO_classification",label=TRUE) +png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_singlets.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +dimpl_htocl +dev.off() + +# dimpl_htoglobal <- DimPlot(scrna.hashtag.singlet, group.by = 'HTO_classification.global', label=TRUE) +# png(filename = paste(out_dir_sample, paste0(sampleid,"_HTO_classification_global_singlets.png"), sep = '/'), +# width = 12, height = 9, units = "in", res = 96) +# print(dimpl_htoglobal) +# dev.off() + +vnl_hto <- scrna.hashtag.singlet %>% + VlnPlot( + features = c( "nCount_HTO"), + ncol = 1, + pt.size = 0.0005, + log = T + ) +png(filename = paste(out_dir_sample, paste0(sampleid,"_violin_hto_singlets.png"), sep = '/'), + width = 12, height = 9, units = "in", res = 96) +print(vnl_hto) +dev.off() + + +## Save Singlets cell and HTO classification in meta data +DefaultAssay(scrna.hashtag) <- "RNA" +scrna.hashtag[['HTO']] <- NULL # eliminate HTO assay +saveRDS(object = scrna.hashtag.singlet, file = paste(out_dir, paste(sampleid, "singlet_minimal.rds", sep = "_"), sep = "/")) +