### open deseq results library(GenomicRanges) library(ggalluvial) folder = 'results/05-filter_qual_bos_qual25/multibamc' deseq_res <- read.table(paste0(folder,'/DESeq-HIGH-MID-allgene.txt')) deseq_res$region <- rownames(deseq_res) sorted_1not2 <- read.table('reference/Rank6_Signature1_not_2.sorted_unique.bed') colnames(sorted_1not2) <- c('seqnames', 'start', 'end') sorted_1not2$region <- paste0(sorted_1not2$seqnames,'_', sorted_1not2$start, '_',sorted_1not2$end) sorted_1not2_gr <- makeGRangesFromDataFrame(sorted_1not2) sorted_1not2$annot <- 'Old' sorted_2not1 <- read.table('reference/Rank6_Signature2_not_1.sorted_unique.bed') colnames(sorted_2not1) <- c('seqnames', 'start', 'end') sorted_2not1$region <- paste0(sorted_2not1$seqnames,'_', sorted_2not1$start, '_',sorted_2not1$end) sorted_2not1_gr <- makeGRangesFromDataFrame(sorted_2not1) sorted_2not1$annot <- 'Young' deseq_reso <- merge(deseq_res,sorted_1not2, by = 'region', all = T) table(deseq_reso$annot) deseq_resoy <- merge(deseq_reso,sorted_2not1, by = 'region', all = T) deseq_resoy$annot <- ifelse(is.na(deseq_resoy$annot.x), 'Young',deseq_resoy$annot.x ) table(deseq_resoy$annot) table(deseq_resoy$annot.x) table(deseq_resoy$annot.y) res <- subset(deseq_resoy, FDR <= 0.05) res$direction <- ifelse(res$logFC > 0, 'Open', 'Close') p_frip <- res[res$logFC >= 0 & res$FDR <= 0.05,] cat('open ', dim(p_frip)[1], '\n') n_frip <- res[res$logFC <= 0 & res$FDR <= 0.05,] cat('close', dim(n_frip)[1], '\n') #write.table(res, 'DESeq2_annotateby_rank6.txt') write.table(res[, c(1:6)], 'DESeq2_Atac.txt', row.names = FALSE) ## write separately the 4 directions open_y <- res[res$direction == 'Open' & res$annot == 'Young', ] open_o <- res[res$direction == 'Open' & res$annot == 'Old', ] close_y <- res[res$direction == 'Close' & res$annot == 'Young', ] close_o <- res[res$direction == 'Close' & res$annot == 'Old', ] # write.table(open_y[, c('seqnames.y', 'start.y', 'end.y', 'region','annot', 'direction')], 'Open_DARs_Y.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t') # write.table(open_o[, c('seqnames.x', 'start.x', 'end.x', 'region', 'annot', 'direction')], 'Open_DARs_O.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t') # write.table(close_y[, c('seqnames.y', 'start.y', 'end.y', 'region' ,'annot', 'direction')], 'Close_DARs_Y.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t') # write.table(close_o[, c('seqnames.x', 'start.x', 'end.x', 'region', 'annot', 'direction')], 'Close_DARs_O.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t') res$seqnames <- sapply( strsplit(res$region,'_'), "[", 1) res$start <- sapply( strsplit(res$region,'_'), "[", 2) res$end <- sapply( strsplit(res$region,'_'), "[", 3) open <- res[res$direction == 'Open', ] close <- res[res$direction == 'Close', ] # write.table(open[, c('seqnames', 'start', 'end', 'region','annot', 'direction')], 'Open_DARs.bed', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t') # write.table(close[, c('seqnames', 'start', 'end', 'region','annot', 'direction')], 'Close_DARs.bed', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t') mydf <- res %>% group_by(direction, annot) %>% summarise(n = n(), .groups = "drop") mydf <- as.data.frame(mydf) mydf$direction <- ifelse(mydf$direction == 'Close', 'Open in Low', 'Open in High') mydf$direction <- factor(mydf$direction, levels= c('Open in Low', 'Open in High')) ggplot(mydf, aes(x = direction, y =n, fill = annot)) + geom_bar(stat = "identity", position = "dodge", width = 0.7)+ scale_fill_manual(values = c('Old'= "red",'Young' = "blue"))+ theme_classic()+ theme(text = element_text(size=12), axis.text.x = element_text(colour="black",size=12), axis.text.y = element_text(colour="black",size=12), axis.title.x = element_blank(), legend.position = "bottom", #legend.justification = 0.05, panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"), panel.grid.minor.y = element_blank(), panel.grid.major.y = element_line(colour = "grey80", linetype = "dashed"), panel.grid.minor.x = element_blank()) + ylab('Number of DARs')+ ggtitle('Differential accessible regions')+theme(plot.title = element_text(size = 12, face = "bold"))+ guides(fill=guide_legend(title="Rank6 Signatures")) cells <- c(1806,3167,3679,2697) rnames <- c("O", "Y") cnames <- c("UP-HIGH", "UP-MID") mydata <- matrix(cells, nrow=2, ncol=2, byrow=TRUE, dimnames=list(rnames, cnames)) mydata f <- fisher.test(mydata) chi <- chisq.test(mydata) mydata <- mydata[,c(2,1)] fisher.test(mydata) f <- data.frame( Category = c(rep(1, 1806), rep(0, 3167), rep(1, 3679), rep(0, 2697)), Group = c(rep(1, 1806 + 3679), rep(0, 3167 + 2697)) ) mycols <- c('Old'= "red",'Young' = "blue") mydf_perc <- mydf %>% group_by(direction) %>% mutate(percentage = n / sum(n) * 100) mydf_perc$annot <- factor(mydf$annot, levels= c('Young','Old')) ggplot(mydf_perc, aes(x=direction,y=percentage, fill=annot)) + geom_col(aes(fill = annot), width = .5, color = "black")+ #ggtitle("Percentage of cell cycle phases per cluster") + scale_fill_manual(name = '', values = mycols) + theme_classic() + scale_y_continuous(name = '% of DARs', expand = c(0.01,0)) + geom_flow(aes(alluvium = annot),min.y= -1, alpha= .9, lty = 2, fill = "white", color = "black", curve_type = "linear", width = .5)+ theme( legend.position = 'right', plot.title = element_text(hjust = 0.5), text = element_text(size = 16), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.title.x = element_blank(), plot.margin = margin(t = 20, r = 10, b = 10, l = 10, unit = 'pt'), axis.line = element_line(colour = "black", size = 0.3, linetype = "solid"), axis.text.x = element_text(size=12, color='black'), # angle=0, hjust=1, axis.text.y = element_text(size=12, color='black'), strip.placement = "outside", ) + ggtitle('Analysis of Differential accessible regions: High vs Low')+theme(plot.title = element_text(size = 12, face = "bold"))+ guides(fill=guide_legend(title="Rank6 Signatures")) p <- ggplot(mydf_perc, aes(x = direction, y = percentage, fill = annot)) + geom_col(aes(fill = annot), width = 0.6, color = "black", size = 0.3) + scale_fill_manual(name = '', values = mycols) + theme_classic() + scale_y_continuous(name = '% of DARs', expand = c(0.01, 0)) + geom_flow(aes(alluvium = annot), min.y = 1, alpha = 0.9, lty = 2, fill = "white", color = "black", curve_type = "linear", width = 0.6) + ggtitle('Analysis of Differential Accessible Regions: HIGH vs MID') + guides(fill = guide_legend(title = "Rank6 Signatures")) + theme( # Text sizes optimized for A4 publication text = element_text(size = 20, family = "Arial"), plot.title = element_text(size = 22, face = "bold", hjust = 0.5, margin = margin(b = 20)), axis.title.y = element_text(size = 22, face = "bold"), axis.title.x = element_blank(), axis.text.x = element_text(size = 22, color = 'black', face = "bold"), axis.text.y = element_text(size = 22, color = 'black'), # Legend formatting - positioned on top legend.position = 'top', legend.title = element_text(size = 18, face = "bold"), legend.text = element_text(size = 16), legend.key.size = unit(1, "cm"), legend.key.width = unit(1.5, "cm"), legend.margin = margin(b = 15), legend.box.spacing = unit(0.5, "cm"), # Panel and grid panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # Margins and spacing - adjusted for top legend plot.margin = margin(t = 15, r = 20, b = 20, l = 20, unit = 'pt'), # Axis lines axis.line = element_line(colour = "black", size = 0.5, linetype = "solid"), axis.ticks = element_line(colour = "black", size = 0.5), axis.ticks.length = unit(0.25, "cm"), # Strip settings strip.placement = "outside" ) setwd('results/05-filter_qual_bos_qual25/multibamc') ggsave("DARs_adjust.svg", plot = p, width = 8, height = 10, device = "svg")