library(dplyr) library(ggplot2) library(RColorBrewer) library(stringr) library(e1071) library(openxlsx) library(scales) library(reshape2) snv_colors2 <- function(del = FALSE, alpha = 1) { nuc <- c("A", "C", "G", "T") # Point deletions dels <- alpha(brewer.pal(5, "Greys")[2:5], 1) names(dels) <- paste0(nuc, "*") # Transitions ts <- alpha(brewer.pal(5, "Blues")[2:5], 1) names(ts) <- c("CT", "GA", "AG", "TC") # Tansversions tv1 <- alpha(brewer.pal(5, "YlGn")[2:5], 1) names(tv1) <- c("AC", "TG", "AT", "TA") tv2 <- alpha(brewer.pal(5, "OrRd")[2:5], 1) names(tv2) <- c("CA", "GT", "CG", "GC") var_cols <- c(ts, tv1, tv2) if (del) { var_cols <- c(var_cols, dels) } return(var_cols) } ##################### ### Variant Plots ### ##################### plot_variants <- function(full.t, out_dir, plot_prefix, fill_by) { dir.create(path = out_dir, showWarnings = F) # Per-Sample tt <- full.t %>% filter(grepl("chr", CHROM)) %>% filter(!CHROM %in% c("chrY", "chrM")) %>% group_by(Sample, !!!syms(fill_by)) %>% summarise(Count = n()) write.xlsx(x = list("NumVariants" = tt), file = paste(out_dir, paste0(plot_prefix, "_VariantCounts.xlsx"), sep = "/")) p <- ggplot(tt, aes(x = Sample, y = Count, fill = get(fill_by), color = get(fill_by))) + theme_bw(base_size = 12) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), legend.position = "none") + geom_bar(stat = "identity", alpha = 0.6) + scale_y_continuous(labels = scales::comma, n.breaks = 8) + xlab("") + facet_grid(.~get(fill_by), scales = "free_x", space = "free") ggsave(filename = paste(out_dir, paste0(plot_prefix, "_VariantCounts.pdf"), sep = "/"), plot = p, width = 6, height = 6) tt <- full.t %>% filter(grepl("chr", CHROM)) %>% filter(!CHROM %in% c("chrY", "chrM")) %>% group_by(Sample, !!!syms(fill_by), REF, ALT) %>% summarise(Count = n()) tt <- mutate(tt, TYPE = case_when(nchar(REF) == 1 & nchar(ALT) == 1 & ALT != "*" ~ "SNV", nchar(REF) == 1 & nchar(ALT) == 1 & ALT == "*" ~ "DEL", nchar(REF) > nchar(ALT) & nchar(REF) - nchar(ALT) >= 1 ~ "DEL", nchar(REF) < nchar(ALT) & nchar(ALT) - nchar(REF) >= 1 ~ "INS", TRUE ~ "Other")) write.xlsx(x = list("VariantClass" = tt), file = paste(out_dir, paste0(plot_prefix, "_VariantClassification.xlsx"), sep = "/")) tt_type <- tt %>% group_by(Sample, !!!syms(fill_by), TYPE) %>% summarise(SumCount = sum(Count)) %>% arrange(desc(SumCount)) %>% group_by(Sample, !!!syms(fill_by)) %>% mutate(CountPerc = SumCount/sum(SumCount)) write.xlsx(x = list("VariantType" = tt_type), file = paste(out_dir, paste0(plot_prefix, "_VariantType.xlsx"), sep = "/")) p <- ggplot(tt_type, aes(x = Sample, y = SumCount, fill = TYPE, color = TYPE)) + theme_bw(base_size = 12) + theme(axis.text.x = element_text(angle = 30, hjust = 1), legend.position = "top") + guides(fill = guide_legend(ncol = 6)) + xlab("") + ylab("Count") + scale_fill_brewer(palette = "Set1", name = "") + scale_color_brewer(palette = "Set1", name = "") + geom_bar(stat = "identity", position = "stack", alpha = 0.6) + scale_y_continuous(labels = scales::comma, n.breaks = 8) + facet_grid(.~get(fill_by), scales = "free_x", space = "free") ggsave(filename = paste(out_dir, paste0(plot_prefix, "_VariantTypes.pdf"), sep = "/"), plot = p, width = 6, height = 6) p <- ggplot(tt_type, aes(x = Sample, y = CountPerc, fill = TYPE, color = TYPE)) + theme_bw(base_size = 12) + theme(axis.text.x = element_text(angle = 30, hjust = 1), legend.position = "top") + guides(fill = guide_legend(ncol = 6)) + xlab("") + ylab("Count") + scale_fill_brewer(palette = "Set1", name = "") + scale_color_brewer(palette = "Set1", name = "") + geom_bar(stat = "identity", position = "stack", alpha = 0.6) + scale_y_continuous(labels = scales::percent_format(accuracy = 2), n.breaks = 10) + facet_grid(.~get(fill_by), scales = "free_x", space = "free") ggsave(filename = paste(out_dir, paste0(plot_prefix, "_VariantTypesPerc.pdf"), sep = "/"), plot = p, width = 6, height = 6) tt <- full.t %>% filter(nchar(REF) == 1 & nchar(ALT) == 1 & grepl("chr", CHROM)) %>% filter(!CHROM %in% c("chrY", "chrM")) %>% group_by(Sample, !!!syms(fill_by), REF, ALT) %>% summarise(Count = n()) %>% group_by(Sample, !!!syms(fill_by)) %>% mutate(CountPerc = Count/sum(Count)) tt$Variant <- paste0(tt$REF, tt$ALT) tt$Variant <- factor(tt$Variant, levels = sort(unique(tt$Variant))) write.xlsx(x = list("SNV" = tt), file = paste(out_dir, paste0(plot_prefix, "_SNVcounts.xlsx"), sep = "/")) tt$Variant <- factor(tt$Variant, levels = rev(names(snv_colors2()))) p2 <- ggplot(tt, aes(x = Sample, y = Count, fill = Variant, color = Variant)) + theme_bw(base_size = 12) + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + geom_bar(stat = "identity", alpha = 0.6) + scale_fill_manual(values = snv_colors2(del = F)) + scale_color_manual(values = snv_colors2(del = F)) + xlab("") + scale_y_continuous(labels = scales::comma, n.breaks = 8) + facet_grid(.~get(fill_by), scales = "free_x", space = "free") ggsave(filename = paste(out_dir, paste0(plot_prefix, "_SNVcounts.pdf"), sep = "/"), plot = p2, width = 7, height = 7) p2p <- ggplot(tt, aes(x = Sample, y = CountPerc, fill = Variant, color = Variant)) + theme_bw(base_size = 12) + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + geom_bar(stat = "identity", alpha = 0.6) + scale_fill_manual(values = snv_colors2(del = F)) + scale_color_manual(values = snv_colors2(del = F)) + scale_y_continuous(labels = scales::percent_format(accuracy = 2), n.breaks = 10) + xlab("") + facet_grid(.~get(fill_by), scales = "free_x", space = "free") ggsave(filename = paste(out_dir, paste0(plot_prefix, "_SNVcountsPerc.pdf"), sep = "/"), plot = p2p, width = 7, height = 7) } ############### ### General ### ############### # Change directory out_dir <- "." full.t.nomulti.noGerm <- read.xlsx(xlsxFile = paste(out_dir, "WES_results.xlsx", sep = "/"), sheet = "Full_NoMulti_noGerm") plot_variants(full.t = full.t.nomulti.noGerm, out_dir = out_dir, plot_prefix = "WES_results", fill_by = "Donor") snpeff_df <- full.t.nomulti.noGerm %>% filter(snpEff_Impact %in% c("HIGH", "MODERATE", "LOW")) %>% group_by(Donor, Group, snpEff_Impact) %>% summarise(Count = n()) snpeff_df <- melt(reshape2::dcast(snpeff_df, Donor + Group ~ snpEff_Impact, value.var = "Count"), id.vars = c("Donor", "Group")) snpeff_df$variable <- factor(snpeff_df$variable, levels = c("HIGH", "MODERATE", "LOW")) write.xlsx(x = list("VarClassification" = snpeff_df), file = paste(out_dir, "WES_results_Variant_Classification.xlsx", sep = "/")) p <- ggplot(snpeff_df, aes(x = Group, y = value, fill = variable, color = variable)) + theme_bw(base_size = 14) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + geom_bar(stat = "identity", alpha = 0.6) + ylab("Count") + xlab("") + ggtitle("Variant Classification") + scale_fill_brewer(palette = "Set1", name = "Classification") + scale_color_brewer(palette = "Set1", name = "Classification") + facet_grid(.~Donor) ggsave(filename = paste(out_dir, "WES_results_Variant_Classification.pdf", sep = "/"), plot = p, width = 8, height = 6) ################## ### Gene Lists ### ################## full.circos <- read.xlsx(xlsxFile = paste(out_dir, "WES_results_GeneLists.xlsx", sep = "/"), sheet = "GeneLists_Variants") p <- ggplot(full.circos, aes(x = Sample, y = snpEff_GeneName, size = VAF, color = TYPE)) + theme_bw(base_size = 10) + theme(strip.text.y = element_text(angle = 0, hjust = 0, size = 12), strip.background.y = element_rect(fill = "white", color = "white")) + geom_jitter(height = 0, alpha = 0.5) + scale_color_brewer(palette = "Set1", name = "Type") + xlab("") + ylab("") + facet_grid(GL~Group, scales = "free", space = "free") + guides(color = guide_legend(override.aes = list(size = 5))) ggsave(filename = paste(out_dir, paste0("WES_results_GeneLists.pdf"), sep = "/"), plot = p, width = 10, height = 12)