# DEGs analysis customized for 1vs1 # 72h vs 24h library(edgeR) library(ggplot2) library(dplyr) library(openxlsx) library(ggrepel) library(biomaRt) cur.dir <- "DGE/" data = read.table("featureCounts_results.txt", header=T, row.names = 1) colnames(data) <- gsub(x = gsub(x = colnames(data), pattern = "_Aligned.sortedByCoord.out.bam", replacement = ""), pattern = "X.beegfs.scratch.ric.gentner.ric.gentner.HSC.Exp2155.results.03.aln.", replacement = "") data <- data[,6:ncol(data)] bcv <- 0.1 # i campioni in PCA sul totale sembrano molto vicini, quindi imposto bassa dispersione counts <- data comparisons <- list(T172h_vs_T124h_rd062 = c("TI72h_rd062","TI24h_rd062"), T172h_vs_T124h_202205 = c("TI72h_202205","TI24h_202205") ) normalized_data <- list() # filtering out very low counts genes comparisons_results <- list() for(i in names(comparisons)){ if(i == "T172h_vs_T124h_202205"){ bcv <- 0.3 }else{ bcv <- 0.1 } ids <- comparisons[[i]] counts.ss <- subset(x = counts, select = ids) rsums <- rowSums(counts.ss) counts.ss <- subset(counts.ss, subset = rsums > 31) # at least 30 counts as sum in the 2 y <- DGEList(counts=counts.ss, group = ids) TMM <- calcNormFactors(y, method = "TMM") normdata <- cpm(y = TMM, normalized.lib.sizes = TRUE) TMM <- exactTest(TMM, dispersion=bcv^2, pair = rev(ids)) table(p.adjust(TMM$table$PValue, method="BH")<0.05) DGEresults <- TMM$table DGEresults$FDR <- p.adjust(DGEresults$PValue, method="BH") DGEresults$isDEG <- DGEresults$FDR < 0.05 # retrieve counts from the counts table DGEresults_with_counts_tmp <- merge.data.frame(DGEresults, counts.ss, by = 0, all.x = T, sort = F) rownames(DGEresults_with_counts_tmp) <- DGEresults_with_counts_tmp$Row.names DGEresults_with_counts_tmp$Row.names <- NULL comparison.dir <- paste0(cur.dir, "/", i, "/") dir.create(comparison.dir, recursive = T, showWarnings = F) volcanodir <- paste0(comparison.dir, "/volcano/") dir.create(volcanodir, recursive = T,showWarnings = F) saveRDS(object = DGEresults_with_counts_tmp, file = paste0(comparison.dir,"edgeR-",ids[1],"-",ids[2],".rds")) write.table(x = DGEresults_with_counts_tmp, file = paste0(comparison.dir,"edgeR-",ids[1],"-",ids[2],".txt"), sep = "\t", row.names = T) comparisons_results[[i]] <- DGEresults_with_counts_tmp colnames(normdata) <- paste0(colnames(normdata), ".cpm") normalized_data[[i]] <- normdata # Volcano plot + goi in all comparisons #### logfc.thr <- 0.6 adj.pval.thr <- 0.05 ntop <- 15 df <- DGEresults_with_counts_tmp df$gene <- rownames(DGEresults_with_counts_tmp) df$FDR <- -log10(df$FDR) print(head(df)) df <- mutate(df, color = case_when(df$logFC > logfc.thr & df$FDR > -log10(adj.pval.thr) ~ "Overexpressed", df$logFC < -logfc.thr & df$FDR > -log10(adj.pval.thr) ~ "Underexpressed", abs(df$logFC) < logfc.thr | df$FDR < -log10(adj.pval.thr) ~ "NonSignificant") ) df <- df[order(df$logFC, decreasing = TRUE),] under <- tail(subset(x = df, subset = color == "Underexpressed"),ntop) over <- head(subset(x = df, subset = color == "Overexpressed"),ntop) #genesofinterest <- subset(x = df, subset = color %in% c("Overexpressed","Underexpressed") & gene %in% goi) hightext <- rbind(under, over) max_x <- ceiling(max(abs(df$logFC))) max_y <- ceiling(max(abs(na.omit(df$FDR)))) vol <- ggplot(df, aes(x = logFC, y = FDR, colour = color)) + #ggtitle(paste(c1, "vs.", c2, sep = " ")) + theme_bw(base_size = 12) + theme(legend.position = "right") + xlim(c(-max_x,max_x)) + ylim(c(0,max_y)) + #xlab(paste("log2(", paste(c1, c2, sep = " / "), ")", sep = "")) + xlab("log2 Fold Change") + ylab(expression(-log[10]("adjusted p-value"))) + labs(title = i) + geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray") if (logfc.thr > 0) { vol <- vol + geom_vline(xintercept = logfc.thr, colour = "darkgray") + geom_vline(xintercept = -logfc.thr, colour = "darkgray") } vol <- vol + geom_point(size = 1, alpha = 0.8, na.rm = T) + scale_color_manual(name = "Expression", values = c(Overexpressed = "#CD4F39", Underexpressed = "#0B5394", NonSignificant = "darkgray")) + geom_label_repel(data = hightext, aes(label = gene), show.legend = FALSE) + #geom_label_repel(data = genesofinterest, aes(label = gene, fill = color), show.legend = FALSE) + theme(legend.position = c(0.5,0.9), plot.title = element_text(hjust = 0.5)) #scale_y_continuous(trans = "log2") ggsave(filename = paste(volcanodir, paste(i, "volcano.png", sep = "-"), sep = "/"), plot = vol, width = 10, height = 9, dpi = 600) } significant <- list() for (k in names(comparisons_results)) { significant[[paste0(k,"_UP")]] <- subset(comparisons_results[[k]], subset = isDEG == T & logFC > 0) significant[[paste0(k,"_DOWN")]] <- subset(comparisons_results[[k]], subset = isDEG == T & logFC < 0) significant[[paste0(k,"_UP")]] <- rownames(significant[[paste0(k,"_UP")]]) significant[[paste0(k,"_DOWN")]] <- rownames(significant[[paste0(k,"_DOWN")]]) } # Connect to the database ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl") # Define the attributes to retrieve attributes = c("external_gene_name", "description") # Retrieve the information gene_info = getBM(attributes, mart = ensembl) comparisons_results[["intersection_sign_concordant_UP"]] <- as.data.frame(intersect(significant$T172h_vs_T124h_rd062_UP, significant$T172h_vs_T124h_202205_UP)) colnames(comparisons_results[["intersection_sign_concordant_UP"]]) <- "Concordant_UP" rownames(comparisons_results[["intersection_sign_concordant_UP"]]) <- comparisons_results[["intersection_sign_concordant_UP"]]$Concordant_UP tmpdf <- comparisons_results[["intersection_sign_concordant_UP"]] tmpdf.annot <- merge(tmpdf, gene_info, by.x = "Concordant_UP", by.y = "external_gene_name", all.x = T, sort = F) tmpdf.annot <- merge(tmpdf.annot, comparisons_results$T172h_vs_T124h_rd062[,c(1,6:7)], by.x = "Concordant_UP", by.y = 0, all.x = T, sort = F) colnames(tmpdf.annot)[3] <- "logFC_rd062" tmpdf.annot <- merge(tmpdf.annot, comparisons_results$T172h_vs_T124h_202205[,c(1,6:7)], by.x = "Concordant_UP", by.y = 0, all.x = T, sort = F) colnames(tmpdf.annot)[6] <- "logFC_202205" tmpdf.annot <- merge.data.frame(tmpdf.annot, normalized_data$T172h_vs_T124h_rd062, by.x = "Concordant_UP", by.y = 0, all.x = T, sort = F) tmpdf.annot <- merge.data.frame(tmpdf.annot, normalized_data$T172h_vs_T124h_202205, by.x = "Concordant_UP", by.y = 0, all.x = T, sort = F) comparisons_results[["intersection_sign_concordant_UP"]] <- tmpdf.annot comparisons_results[["intersection_sign_concordant_DW"]] <- as.data.frame(intersect(significant$T172h_vs_T124h_rd062_DOWN, significant$T172h_vs_T124h_202205_DOWN)) colnames(comparisons_results[["intersection_sign_concordant_DW"]]) <- "Concordant_DOWN" rownames(comparisons_results[["intersection_sign_concordant_DW"]]) <- comparisons_results[["intersection_sign_concordant_DW"]]$Concordant_DOWN tmpdf <- comparisons_results[["intersection_sign_concordant_DW"]] tmpdf.annot <- merge(tmpdf, gene_info, by.x = "Concordant_DOWN", by.y = "external_gene_name", all.x = T, sort = F) tmpdf.annot <- merge(tmpdf.annot, comparisons_results$T172h_vs_T124h_rd062[,c(1,6:7)], by.x = "Concordant_DOWN", by.y = 0, all.x = T, sort = F) colnames(tmpdf.annot)[3] <- "logFC_rd062" tmpdf.annot <- merge(tmpdf.annot, comparisons_results$T172h_vs_T124h_202205[,c(1,6:7)], by.x = "Concordant_DOWN", by.y = 0, all.x = T, sort = F) colnames(tmpdf.annot)[6] <- "logFC_202205" tmpdf.annot <- merge.data.frame(tmpdf.annot, normalized_data$T172h_vs_T124h_rd062, by.x = "Concordant_DOWN", by.y = 0, all.x = T, sort = F) tmpdf.annot <- merge.data.frame(tmpdf.annot, normalized_data$T172h_vs_T124h_202205, by.x = "Concordant_DOWN", by.y = 0, all.x = T, sort = F) comparisons_results[["intersection_sign_concordant_DW"]] <- tmpdf.annot saveRDS(comparisons_results, file = "Comparisons_exp1_1vs1.rds") write.xlsx(comparisons_results, file = "Comparisons_exp1_1vs1.xlsx", asTable = T, rowNames = T, overwrite = T, firstRow = T) saveRDS(normalized_data, file = "SizeFactors_norm_cpm_exp1_1vs1.rds") normalized_data$T172h_vs_T124h_rd062 <- as.data.frame(normalized_data$T172h_vs_T124h_rd062) normalized_data$T172h_vs_T124h_202205 <- as.data.frame(normalized_data$T172h_vs_T124h_202205) write.xlsx(normalized_data, file = "SizeFactors_norm_cpm_exp1_1vs1.xlsx", asTable = T, rowNames = T, overwrite = T, firstRow = T)