Commit b8e96e20 authored by Matteo Barcella's avatar Matteo Barcella
Browse files

Code, input and output for reproducing RNAseq images in manuscript

parent 5e75a2d5
# code for reproducing figures in paper
# AIM: create a dotplot / heatmap with genes of interest present in the attachment
# facets by category of interest shown in the excel
# loading libraries ####
library(ggplot2)
library(openxlsx)
library(ggrepel)
library(DESeq2)
library(reshape2)
library(RColorBrewer)
library(scales)
# loading genelist file and create a dataframe ####
mydata <- read.xlsx(xlsxFile = "GeneList_Fig2A.xlsx")
mydata$Cluster[2:4] <- "Integrins"
mydata$Cluster[6:12] <- "Actin regulators"
mydata$Cluster[14:21] <- "Tubulin regulators"
mydata$Cluster[23:28] <- "Focal adhesion"
mydata$Cluster[30:32] <- "Notch signaling"
mydata$Cluster[34:35] <- "WNT signaling"
mydata$Cluster[37:45] <- "HSC"
# remove double entry AKT3
mydata <- subset(mydata, Gene != "AKT3.1")
mydata$Cluster <- gsub(pattern = "HSC", replacement = "HSC related", x = mydata$Cluster)
saveRDS(object = mydata, file = "Gene_list.rds")
# read data analysis input ####
myexpress <- readRDS("20240208_095535_DGE.rds")
myexpress.bck <- myexpress
dge_res <- myexpress$deseq$dge_res
DESeq.rlog <- rlogTransformation(myexpress$deseq$ds, blind = TRUE)
colannotations <- myexpress$deseq$sample_info
# subsetting matrix of rlog counts according to genes of interest ####
normcounts <- DESeq2::counts(object = myexpress$deseq$ds, normalized = T)
normacounts_NE <- normcounts[,rownames(colannotations)]
goi_expr <- as.matrix(normacounts_NE[mydata$Gene,])
## Version with means ####
goi_expr_means_melt <- melt(goi_expr)
goi_expr_means_melt_annot <- merge.data.frame(goi_expr_means_melt, colannotations, by.x = "Var2", by.y = 0, sort = F)
goi_expr_means_melt_annot_2 <- goi_expr_means_melt_annot %>% group_by(Var1, condition) %>% summarise(MeanVal = mean(value))
goi_expr_means <- dcast(goi_expr_means_melt_annot_2, formula = Var1 ~ condition , value.var = "MeanVal")
rownames(goi_expr_means) <- goi_expr_means$Var1
goi_expr_means$Var1 <- NULL
goi_expr_scaled_2 <- t(scale(t(goi_expr_means)))
goi_expr_df_2 <- as.data.frame(goi_expr_scaled_2)
goi_expr_df_2$Gene <- rownames(goi_expr_df_2)
goi_expr_df_2_all <- merge.data.frame(goi_expr_df_2, mydata, by = "Gene", all.x = T, sort = F)
goi_expr_melt_2 <- melt(data = goi_expr_df_2_all, id.vars = c("Gene","Cluster"))
goi_expr_melt_2$Timepoint <- gsub(x = goi_expr_melt_2$variable, pattern = "^.*NE", replacement = "")
goi_expr_melt_2$Condition <- substr(x = goi_expr_melt_2$variable, start = 1, stop = 2)
goi_expr_melt_2$Cluster <- factor(goi_expr_melt_2$Cluster, levels = unique(goi_expr_melt_2$Cluster))
goi_expr_melt_2_degannot <- merge.data.frame(x = goi_expr_melt_2,y = DEGs_d4, by = "Gene", all.x = T, sort = F)
goi_expr_melt_2_degannot$isDEGd4[is.na(goi_expr_melt_2_degannot$isDEGd4)] <- "notDEG"
goi_expr_melt_2_degannot <- merge.data.frame(x = goi_expr_melt_2_degannot,y = DEGs_d7, by = "Gene", all.x = T, sort = F)
goi_expr_melt_2_degannot$isDEGd7[is.na(goi_expr_melt_2_degannot$isDEGd7)] <- "notDEG"
goi_expr_melt_2_degannot$isDEG <- "noDEG"
goi_expr_melt_2_degannot$isDEG[which(goi_expr_melt_2_degannot$isDEGd4 == "isDEG" & goi_expr_melt_2_degannot$Timepoint == "d4")] <- "isDEG"
goi_expr_melt_2_degannot$isDEG[which(goi_expr_melt_2_degannot$isDEGd7 == "isDEG" & goi_expr_melt_2_degannot$Timepoint == "d7")] <- "isDEG"
goi_expr_melt_2_degannot$variable <- gsub(x = goi_expr_melt_2_degannot$variable, pattern = "NEd7", replacement = "")
goi_expr_melt_2_degannot$variable <- gsub(x = goi_expr_melt_2_degannot$variable, pattern = "NEd4", replacement = "")
library(scales)
# Load the original RdYlBu palette
original_palette <- rev(brewer.pal(11, "RdYlBu"))
# Create a darker version of the palette using muted()
darker_palette <- muted(original_palette, l = 30, c = 100)
goi_expr_melt_2_degannot$Timepoint <- gsub(x = goi_expr_melt_2_degannot$Timepoint,pattern = "d7", replacement = "Day7")
goi_expr_melt_2_degannot$Timepoint <- gsub(x = goi_expr_melt_2_degannot$Timepoint,pattern = "d4", replacement = "Day4")
png(filename = "GOI_dotplot.png", width = 6, height = 9, units = "in", res = 300)
ggplot(goi_expr_melt_2_degannot, mapping = aes(x = variable, y = Gene, fill = value, shape = Condition)) +
scale_shape_manual(values = c(21, 22)) +
geom_point(size = 2, show.legend = F) +
geom_point(data = subset.data.frame(goi_expr_melt_2_degannot, subset = isDEG == "isDEG"),
size = 4, colour = "black") +
scale_fill_distiller(palette = "RdYlBu", direction = -1) +
facet_grid(Cluster ~ Timepoint, scales = "free",
space = "free",
labeller = labeller(Cluster = label_wrap_gen(7),
Timepoint = label_wrap_gen(10))) +
labs(fill = "Zscore\nExpression") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.title = element_text(hjust = 0.5),
panel.background = element_rect(fill = "white"),
strip.background = element_blank(),
panel.spacing = unit(1.3, "lines"),
strip.clip = "off",
panel.grid = element_line(colour = "lightgrey"), axis.title = element_blank())
dev.off()
# Volcano plots - 3D vs 2D ####
library(dplyr)
exprlist <- list(Day4 = myexpress$deseq$dge_res$`3DNEd4-2DNEd4`,
Day7 = myexpress$deseq$dge_res$`3DNEd7-2DNEd7`)
logfc.thr <- 0.6
adj.pval.thr <- 0.05
for(tpdata in names(exprlist)){
volcano_input <- exprlist[[tpdata]]
volcano_input <- subset(volcano_input, subset = !is.na(FDR))
volcano_input$FDR[which(volcano_input$FDR == 0)] <- .Machine$double.xmin
df <- data.frame(gene = rownames(volcano_input), pval = -log10(volcano_input$FDR),
lfc = volcano_input$logFC)
df <- mutate(df, color = case_when(df$lfc > logfc.thr & df$pval > -log10(adj.pval.thr) ~ "Overexpressed",
df$lfc < -logfc.thr & df$pval > -log10(adj.pval.thr) ~ "Underexpressed",
abs(df$lfc) < logfc.thr | df$pval < -log10(adj.pval.thr) ~ "NonSignificant"))
df <- df[order(df$lfc, decreasing = TRUE),]
max_x <- ceiling(max(abs(df$lfc)))
max_y <- ceiling(max(abs(na.omit(df$pval)))) + 10
vol <- ggplot(df, aes(x = lfc, y = pval, colour = color)) +
theme_bw(base_size = 16) +
theme(legend.position = "right") +
xlim(c(-max_x,max_x)) +
ylim(c(0,max_y)) +
xlab("log2 Fold Change") +
ylab(expression(-log[10]("adjusted p-value"))) +
geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray")
if (logfc.thr > 0) {
vol <- vol +
geom_vline(xintercept = logfc.thr, colour = "darkgray") +
geom_vline(xintercept = -logfc.thr, colour = "darkgray")
}
vol <- vol +
geom_point(size = 1, alpha = 0.8, na.rm = T) +
scale_color_manual(name = NULL,
values = c(Overexpressed = "#CD4F39",
Underexpressed = "#0B5394",
NonSignificant = "darkgray")) +
theme(legend.position = "bottom", #c(0.2,0.9),
legend.key.height = unit(units = "cm", x = 0.2),
legend.key.width = unit(units = "cm", x = 0.2),
plot.title = element_text(hjust = 0.5)) +
ggtitle(paste0("3D vs 2D ", tpdata))
ggsave(filename = paste0(tpdata,"_volcano_DEGs.png"),
plot = vol,
width = 6, height = 6, dpi = 300)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment