Commit 332879cf authored by Teresa Tavella's avatar Teresa Tavella
Browse files

annotate DARs

parent 45b7d40b
### open deseq results
library(GenomicRanges)
library(ggalluvial)
folder = 'results/05-filter_qual_bos_qual25/multibamc'
deseq_res <- read.table(paste0(folder,'/DESeq-HIGH-MID-allgene.txt'))
deseq_res$region <- rownames(deseq_res)
sorted_1not2 <- read.table('reference/Rank6_Signature1_not_2.sorted_unique.bed')
colnames(sorted_1not2) <- c('seqnames', 'start', 'end')
sorted_1not2$region <- paste0(sorted_1not2$seqnames,'_', sorted_1not2$start, '_',sorted_1not2$end)
sorted_1not2_gr <- makeGRangesFromDataFrame(sorted_1not2)
sorted_1not2$annot <- 'Old'
sorted_2not1 <- read.table('reference/Rank6_Signature2_not_1.sorted_unique.bed')
colnames(sorted_2not1) <- c('seqnames', 'start', 'end')
sorted_2not1$region <- paste0(sorted_2not1$seqnames,'_', sorted_2not1$start, '_',sorted_2not1$end)
sorted_2not1_gr <- makeGRangesFromDataFrame(sorted_2not1)
sorted_2not1$annot <- 'Young'
deseq_reso <- merge(deseq_res,sorted_1not2, by = 'region', all = T)
table(deseq_reso$annot)
deseq_resoy <- merge(deseq_reso,sorted_2not1, by = 'region', all = T)
deseq_resoy$annot <- ifelse(is.na(deseq_resoy$annot.x), 'Young',deseq_resoy$annot.x )
table(deseq_resoy$annot)
table(deseq_resoy$annot.x)
table(deseq_resoy$annot.y)
res <- subset(deseq_resoy, FDR <= 0.05)
res$direction <- ifelse(res$logFC > 0, 'Open', 'Close')
p_frip <- res[res$logFC >= 0 & res$FDR <= 0.05,]
cat('open ', dim(p_frip)[1], '\n')
n_frip <- res[res$logFC <= 0 & res$FDR <= 0.05,]
cat('close', dim(n_frip)[1], '\n')
#write.table(res, 'DESeq2_annotateby_rank6.txt')
write.table(res[, c(1:6)], 'DESeq2_Atac.txt', row.names = FALSE)
## write separately the 4 directions
open_y <- res[res$direction == 'Open' & res$annot == 'Young', ]
open_o <- res[res$direction == 'Open' & res$annot == 'Old', ]
close_y <- res[res$direction == 'Close' & res$annot == 'Young', ]
close_o <- res[res$direction == 'Close' & res$annot == 'Old', ]
# write.table(open_y[, c('seqnames.y', 'start.y', 'end.y', 'region','annot', 'direction')], 'Open_DARs_Y.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
# write.table(open_o[, c('seqnames.x', 'start.x', 'end.x', 'region', 'annot', 'direction')], 'Open_DARs_O.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
# write.table(close_y[, c('seqnames.y', 'start.y', 'end.y', 'region' ,'annot', 'direction')], 'Close_DARs_Y.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
# write.table(close_o[, c('seqnames.x', 'start.x', 'end.x', 'region', 'annot', 'direction')], 'Close_DARs_O.txt', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
res$seqnames <- sapply( strsplit(res$region,'_'), "[", 1)
res$start <- sapply( strsplit(res$region,'_'), "[", 2)
res$end <- sapply( strsplit(res$region,'_'), "[", 3)
open <- res[res$direction == 'Open', ]
close <- res[res$direction == 'Close', ]
# write.table(open[, c('seqnames', 'start', 'end', 'region','annot', 'direction')], 'Open_DARs.bed', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
# write.table(close[, c('seqnames', 'start', 'end', 'region','annot', 'direction')], 'Close_DARs.bed', col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
mydf <- res %>%
group_by(direction, annot) %>%
summarise(n = n(), .groups = "drop")
mydf <- as.data.frame(mydf)
mydf$direction <- ifelse(mydf$direction == 'Close', 'Open in Low', 'Open in High')
mydf$direction <- factor(mydf$direction, levels= c('Open in Low', 'Open in High'))
ggplot(mydf, aes(x = direction, y =n, fill = annot)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7)+
scale_fill_manual(values = c('Old'= "red",'Young' = "blue"))+
theme_classic()+
theme(text = element_text(size=12),
axis.text.x = element_text(colour="black",size=12),
axis.text.y = element_text(colour="black",size=12),
axis.title.x = element_blank(),
legend.position = "bottom",
#legend.justification = 0.05,
panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_line(colour = "grey80", linetype = "dashed"),
panel.grid.minor.x = element_blank()) + ylab('Number of DARs')+
ggtitle('Differential accessible regions')+theme(plot.title = element_text(size = 12, face = "bold"))+
guides(fill=guide_legend(title="Rank6 Signatures"))
cells <- c(1806,3167,3679,2697)
rnames <- c("O", "Y")
cnames <- c("UP-HIGH", "UP-MID")
mydata <- matrix(cells, nrow=2, ncol=2, byrow=TRUE, dimnames=list(rnames, cnames))
mydata
f <- fisher.test(mydata)
chi <- chisq.test(mydata)
mydata <- mydata[,c(2,1)]
fisher.test(mydata)
f <- data.frame(
Category = c(rep(1, 1806), rep(0, 3167), rep(1, 3679), rep(0, 2697)),
Group = c(rep(1, 1806 + 3679), rep(0, 3167 + 2697))
)
mycols <- c('Old'= "red",'Young' = "blue")
mydf_perc <- mydf %>%
group_by(direction) %>%
mutate(percentage = n / sum(n) * 100)
mydf_perc$annot <- factor(mydf$annot, levels= c('Young','Old'))
ggplot(mydf_perc, aes(x=direction,y=percentage, fill=annot)) +
geom_col(aes(fill = annot), width = .5, color = "black")+
#ggtitle("Percentage of cell cycle phases per cluster") +
scale_fill_manual(name = '', values = mycols) +
theme_classic() +
scale_y_continuous(name = '% of DARs', expand = c(0.01,0)) +
geom_flow(aes(alluvium = annot),min.y= -1, alpha= .9,
lty = 2, fill = "white", color = "black",
curve_type = "linear",
width = .5)+
theme(
legend.position = 'right',
plot.title = element_text(hjust = 0.5),
text = element_text(size = 16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
plot.margin = margin(t = 20, r = 10, b = 10, l = 10, unit = 'pt'),
axis.line = element_line(colour = "black",
size = 0.3, linetype = "solid"),
axis.text.x = element_text(size=12, color='black'), # angle=0, hjust=1,
axis.text.y = element_text(size=12, color='black'),
strip.placement = "outside",
) +
ggtitle('Analysis of Differential accessible regions: High vs Low')+theme(plot.title = element_text(size = 12, face = "bold"))+
guides(fill=guide_legend(title="Rank6 Signatures"))
p <- ggplot(mydf_perc, aes(x = direction, y = percentage, fill = annot)) +
geom_col(aes(fill = annot), width = 0.6, color = "black", size = 0.3) +
scale_fill_manual(name = '', values = mycols) +
theme_classic() +
scale_y_continuous(name = '% of DARs', expand = c(0.01, 0)) +
geom_flow(aes(alluvium = annot),
min.y = 1,
alpha = 0.9,
lty = 2,
fill = "white",
color = "black",
curve_type = "linear",
width = 0.6) +
ggtitle('Analysis of Differential Accessible Regions: HIGH vs MID') +
guides(fill = guide_legend(title = "Rank6 Signatures")) +
theme(
# Text sizes optimized for A4 publication
text = element_text(size = 20, family = "Arial"),
plot.title = element_text(size = 22, face = "bold", hjust = 0.5,
margin = margin(b = 20)),
axis.title.y = element_text(size = 22, face = "bold"),
axis.title.x = element_blank(),
axis.text.x = element_text(size = 22, color = 'black', face = "bold"),
axis.text.y = element_text(size = 22, color = 'black'),
# Legend formatting - positioned on top
legend.position = 'top',
legend.title = element_text(size = 18, face = "bold"),
legend.text = element_text(size = 16),
legend.key.size = unit(1, "cm"),
legend.key.width = unit(1.5, "cm"),
legend.margin = margin(b = 15),
legend.box.spacing = unit(0.5, "cm"),
# Panel and grid
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Margins and spacing - adjusted for top legend
plot.margin = margin(t = 15, r = 20, b = 20, l = 20, unit = 'pt'),
# Axis lines
axis.line = element_line(colour = "black", size = 0.5, linetype = "solid"),
axis.ticks = element_line(colour = "black", size = 0.5),
axis.ticks.length = unit(0.25, "cm"),
# Strip settings
strip.placement = "outside"
)
setwd('results/05-filter_qual_bos_qual25/multibamc')
ggsave("DARs_adjust.svg",
plot = p,
width = 8,
height = 10,
device = "svg")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment