Commit b914edca authored by Monah Abou Alezz's avatar Monah Abou Alezz
Browse files

fix GSVA

parent aee89bcf
# ========================================================- # ========================================================-
# Title: GSVA analysis on scRNAseq and snRNAseq samples # Title: GSVA Analysis on scRNAseq and snRNAseq datasets
# Description: Code to generate SuppFig13 # Description: Code to generate SuppFig13
# Author: Monah Abou Alezz # Author: Monah Abou Alezz
# Date: 2025-03-06 # Date: 2025-03-06
# ========================================================- # ========================================================-
suppressPackageStartupMessages(library(Seurat)) suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(openxlsx))
suppressPackageStartupMessages(library(patchwork)) suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap)) suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(GSVA)) suppressPackageStartupMessages(library(GSVA))
suppressPackageStartupMessages(library(cogena))
suppressPackageStartupMessages(library(purrr)) suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(presto)) suppressPackageStartupMessages(library(presto))
suppressPackageStartupMessages(library(DESeq2)) suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(cogena)) suppressPackageStartupMessages(library(org.Hs.eg.db))
suppressPackageStartupMessages(library(org.Mm.eg.db))
#### loading the hallmark gmt file as a list using the gmt2list function from cogena ## load hallmark objects
gs_human <- gmt2list("~/databases/GSEA/GSEA_human_v7.2/h.all.v7.2.symbols.gmt") gs_human <- gmt2list("data/h.all.v7.2.symbols.gmt")
gs_mouse <- gmt2list("~/databases/GSEA/GSEA_mouse_v7.2/h.all.v7.2.symbols_mouse.gmt") gs_mouse <- gmt2list("data//h.all.v7.2.symbols_mouse.gmt")
## load datasets
## loading datasets
mouse.combined <- readRDS("data/mouse_striatum_snRNA_AAV9.rds") mouse.combined <- readRDS("data/mouse_striatum_snRNA_AAV9.rds")
unique(mouse.combined$Anno) Idents(mouse.combined) <- "Cell_Category"
mouse.combined <- subset(mouse.combined, Anno == "Astrocytes" | d4_final <- readRDS("data/mixed_d4_final.rds")
Anno == "Mature_Oligos" | d4_aav9_ut <- subset(d4_final, orig.ident %in% c("AAV9", "UT"))
Anno == "Immature_Oligos" | d4_aav9_ut@meta.data[['orig.ident']] <- recode_factor(d4_aav9_ut@meta.data[["orig.ident"]],
Anno == "Excitatory" | "AAV9" = "d4_AAV9",
Anno == "Inhibitory") "UT" = "d4_UT")
mouse.combined@meta.data[['Anno']] <- recode_factor( d4_aav9_ut@meta.data[["Cell_Category"]] <- recode_factor(
mouse.combined@meta.data[["Anno"]], d4_aav9_ut@meta.data[["Cell_Type"]],
"Astrocytes" = "Astrocytes", "Undifferentiated" = "Undifferentiated",
"Mature_Oligos" = "Oligodendrocytes", "Oligodendrocytes" = "d4_Oligodendrocytes",
"Immature_Oligos" = "Oligodendrocytes", "OPC" = "d4_Oligodendrocytes",
"Excitatory" = "Neurons", "Astrocytes" = "d4_Astrocytes",
"Inhibitory" = "Neurons") "Immature.Astrocytes" = "d4_Astrocytes",
mouse.combined$Anno <- droplevels(mouse.combined$Anno) "Neurons" = "d4_Neurons",
unique(mouse.combined$Anno) "Immature.Neurons" = "d4_Neurons")
mouse.combined$CellType <- mouse.combined$Anno d4_aav9_ut$Cell_Category <- factor(d4_aav9_ut$Cell_Category,
d4_1707 <- readRDS("data/mixed_D4_final.rds")
d4_1707@meta.data[['orig.ident']] <- recode_factor(
d4_1707@meta.data[["orig.ident"]],
"Sample2" = "AAV2",
"Sample4" = "d4_AAV9",
"Sample6" = "Spk",
"Sample8" = "d4_UT")
d4_1707@meta.data[["RNA_snn_h.orig.ident_res.1.2"]] <- recode_factor(
d4_1707@meta.data[["RNA_snn_h.orig.ident_res.1.2"]],
"0" = "Undifferentiated",
"1" = "d4_Oligodendrocytes",
"2" = "d4_Neurons",
"3" = "d4_Astrocytes",
"4" = "d4_Neurons",
"5" = "d4_Oligodendrocytes",
"6" = "d4_Astrocytes",
"7" = "d4_Oligodendrocytes",
"8" = "d4_Oligodendrocytes",
"9" = "d4_Neurons",
"10" = "d4_Oligodendrocytes",
"11" = "d4_Oligodendrocytes",
"12" = "d4_Astrocytes",
"13" = "d4_Astrocytes",
"14" = "d4_Oligodendrocytes",
"15" = "d4_Astrocytes",
"16" = "d4_Astrocytes")
Idents(d4_1707) <- "RNA_snn_h.orig.ident_res.1.2"
d4_1707$RNA_snn_h.orig.ident_res.1.2 <- factor(d4_1707$RNA_snn_h.orig.ident_res.1.2,
levels = c("d4_Astrocytes", levels = c("d4_Astrocytes",
"d4_Neurons", "d4_Neurons",
"d4_Oligodendrocytes", "d4_Oligodendrocytes",
"Undifferentiated")) "Undifferentiated"))
d4_1707 <- subset(d4_1707, RNA_snn_h.orig.ident_res.1.2 != "Undifferentiated") d4_aav9_ut <- subset(d4_aav9_ut, Cell_Category != "Undifferentiated")
d4_1707$RNA_snn_h.orig.ident_res.1.2 <- droplevels(d4_1707$RNA_snn_h.orig.ident_res.1.2) d4_aav9_ut$Cell_Category <- droplevels(d4_aav9_ut$Cell_Category)
unique(d4_1707$RNA_snn_h.orig.ident_res.1.2) Idents(d4_aav9_ut) <- "Cell_Category"
d4_1707 <- subset(d4_1707, orig.ident == "d4_AAV9" | orig.ident == "d4_UT") d5_final <- readRDS("data/organoids_d5_final.rds")
d4_1707$orig.ident <- droplevels(d4_1707$orig.ident) d5_aav9_ut <- subset(d5_final, orig.ident %in% c("AAV9", "UT"))
d4_1707$CellType <- d4_1707$RNA_snn_h.orig.ident_res.1.2 d5_aav9_ut@meta.data[['orig.ident']] <- recode_factor(d5_aav9_ut@meta.data[["orig.ident"]],
"AAV9" = "d5_AAV9",
d5_1743 <- readRDS("data/organoids_D5_final.rds") "UT" = "d5_UT")
d5_1743@meta.data[['orig.ident']] <- recode_factor( d5_aav9_ut@meta.data[["Cell_Category"]] <- recode_factor(
d5_1743@meta.data[["orig.ident"]], d5_aav9_ut@meta.data[["Cell_Type"]],
"Sample2" = "AAV2", "Undifferentiated" = "Undifferentiated",
"Sample4" = "d5_AAV9", "Oligodendrocytes" = "d5_Oligodendrocytes",
"Sample6" = "Spk", "OPC" = "d5_Oligodendrocytes",
"Sample8" = "d5_UT") "Astrocytes" = "d5_Astrocytes",
d5_1743@meta.data[["RNA_snn_h.orig.ident_res.0.6"]] <- recode_factor( "Immature.Astrocytes" = "d5_Astrocytes",
d5_1743@meta.data[["RNA_snn_h.orig.ident_res.0.6"]], "Neurons" = "d5_Neurons",
"0" = "d5_Astrocytes", "Immature.Neurons" = "d5_Neurons")
"1" = "d5_Neurons", d5_aav9_ut$Cell_Category <- factor(d5_aav9_ut$Cell_Category,
"2" = "Undifferentiated",
"3" = "d5_Neurons",
"4" = "d5_Oligodendrocytes",
"5" = "Pericytes",
"6" = "Pericytes",
"7" = "d5_Astrocytes",
"8" = "d5_Oligodendrocytes",
"9" = "d5_Neurons",
"10" = "d5_Astrocytes",
"11" = "Pericytes",
"12" = "d5_Astrocytes",
"13" = "d5_Neurons")
d5_1743 <- subset(d5_1743, RNA_snn_h.orig.ident_res.0.6 != "Pericytes")
Idents(d5_1743) <- "RNA_snn_h.orig.ident_res.0.6"
d5_1743$RNA_snn_h.orig.ident_res.0.6 <- factor(d5_1743$RNA_snn_h.orig.ident_res.0.6,
levels = c("d5_Astrocytes", levels = c("d5_Astrocytes",
"d5_Neurons", "d5_Neurons",
"d5_Oligodendrocytes", "d5_Oligodendrocytes",
"Undifferentiated")) "Undifferentiated"))
d5_1743 <- subset(d5_1743, RNA_snn_h.orig.ident_res.0.6 != "Undifferentiated") d5_aav9_ut <- subset(d5_aav9_ut, Cell_Category != "Undifferentiated")
d5_1743$RNA_snn_h.orig.ident_res.0.6 <- droplevels(d5_1743$RNA_snn_h.orig.ident_res.0.6) d5_aav9_ut$Cell_Category <- droplevels(d5_aav9_ut$Cell_Category)
unique(d5_1743$RNA_snn_h.orig.ident_res.0.6) Idents(d5_aav9_ut) <- "Cell_Category"
d5_1743 <- subset(d5_1743, orig.ident == "d5_AAV9" | orig.ident == "d5_UT")
d5_1743$orig.ident <- droplevels(d5_1743$orig.ident)
d5_1743$CellType <- d5_1743$RNA_snn_h.orig.ident_res.0.6
## create a pseudobulk and normalize ## create a pseudobulk and normalize
datasets_pseudo <- c("mouse.combined", "d4_1707", "d5_1743") datasets_pseudo <- c("mouse.combined", "d4_aav9_ut", "d5_aav9_ut")
for (i in datasets_pseudo) { for (i in datasets_pseudo) {
print(paste0("Analyzing... ",i))
obj_i <- get(i) obj_i <- get(i)
data_collapsed <- presto::collapse_counts(obj_i@assays$RNA@counts, data_collapsed <- presto::collapse_counts(obj_i@assays$RNA@counts,
obj_i@meta.data, obj_i@meta.data,
c('CellType', 'orig.ident')) c('Cell_Category', 'orig.ident'))
meta_data<- data_collapsed$meta_data meta_data<- data_collapsed$meta_data
mat <- data_collapsed$counts_mat mat <- data_collapsed$counts_mat
colnames(mat)<- paste(meta_data$CellType, meta_data$orig.ident, sep="_") colnames(mat)<- paste(meta_data$Cell_Category, meta_data$orig.ident, sep="_")
rownames(meta_data) <- paste(meta_data$CellType, meta_data$orig.ident, sep="_") rownames(meta_data) <- paste(meta_data$Cell_Category, meta_data$orig.ident, sep="_")
pseudo_bulk_obj <- list(mat = mat, meta_data = meta_data) pseudo_bulk_obj <- list(mat = mat, meta_data = meta_data)
assign(paste0(i, "_mat"), mat) assign(paste0(i, "_mat"), mat)
assign(paste0(i,"_pseduo_bulk_obj"), pseudo_bulk_obj) assign(paste0(i,"_pseduo_bulk_obj"), pseudo_bulk_obj)
...@@ -136,20 +87,17 @@ for (i in datasets_pseudo) { ...@@ -136,20 +87,17 @@ for (i in datasets_pseudo) {
DESeq.rlog <- rlogTransformation(DESeq.ds, blind = TRUE) DESeq.rlog <- rlogTransformation(DESeq.ds, blind = TRUE)
counts <- assay(DESeq.rlog) counts <- assay(DESeq.rlog)
assign(paste0(i,"_counts"), counts) assign(paste0(i,"_counts"), counts)
if (i != "mouse.combined") {
counts_es <- ExpressionSet(assayData = as.matrix(counts), annotation = org.Hs.eg.db)
} else {
counts_es <- ExpressionSet(assayData = as.matrix(counts), annotation = org.Mm.eg.db)
}
assign(paste0(i,"_es"), counts_es)
} }
## converting matrices into expressionsets ## using matrices
d4_1707_counts_es <- ExpressionSet(assayData = as.matrix(d4_1707_counts), gsva_d4 <- gsva(as.matrix(d4_aav9_ut_counts), gs_human, min.sz=5, max.sz=500)
annotation = "org.Hs.eg.db") gsva_d5 <- gsva(as.matrix(d5_aav9_ut_counts), gs_human, min.sz=5, max.sz=500)
d5_1743_counts_es <- ExpressionSet(assayData = as.matrix(d5_1743_counts), gsva_mouse <- gsva(as.matrix(mouse.combined_counts), gs_mouse, min.sz=5, max.sz=500)
annotation = "org.Hs.eg.db")
mouse_counts_es <- ExpressionSet(assayData = as.matrix(mouse.combined_counts),
annotation = "org.Mm.eg.db")
gsva_d4_1707_counts <- gsva(as.matrix(d4_1707_counts), gs_human, min.sz=5, max.sz=500)
gsva_d5_1743_counts <- gsva(as.matrix(d5_1743_counts), gs_human, min.sz=5, max.sz=500)
gsva_mouse.combined_counts <- gsva(as.matrix(mouse.combined_counts),
gs_mouse, min.sz=5, max.sz=500)
gsva_counts_combined <- cbind(gsva_d4_1707_counts, gsva_d5_1743_counts, gsva_mouse.combined_counts) gsva_counts_combined <- cbind(gsva_d4_1707_counts, gsva_d5_1743_counts, gsva_mouse.combined_counts)
p <- pheatmap(gsva_counts_combined) p <- pheatmap(gsva_counts_combined)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment