Commit 0cb7bdea authored by Matteo Barcella's avatar Matteo Barcella
Browse files

Script barcoding analysis

parent 6d9d4857
This diff is collapsed.
# Run Analysis (MM samples + Expanded)
library(vegan)
library(dplyr)
source(file = "../ChartingBCs_Light.R")
library(RColorBrewer)
library(ggrepel)
library(openxlsx)
paperdata <- "output/"
dir.create(path = paperdata)
rootdir <- "./"
bseqdir <- paste0(rootdir,"Input/") # output from barseq
samplesheets <- list(MinPct = paste0(rootdir,"Input/DP-like_EX-U_samplesheet.txt"))
# SampleIDs
# MMs : MM1 MM3 MM4 (Minimally manipulated - DP-like group)
# Ehigh: E09 E10 E11 E12 E13 E14
# Elow: E16 E17 E18 E19 E20 E21
MM = c("MM1","MM3","MM4")
Ehigh = c("E09", "E10", "E11", "E12", "E13", "E14")
Elow = c( "E16", "E17", "E18", "E19", "E20", "E21")
SampleIDs <- list(MM = MM,
Ehigh = Ehigh,
Elow = Elow,
MM_E = c(MM,Ehigh,Elow),
MM_Elow = c(MM,Elow),
MM_Ehigh = c(MM,Ehigh),
)
#### Basic analysis ####
for(setting in c("MinPct"))
dir.create(path = setting, showWarnings = F)
if(setting == "Auto"){
custom.cut.param <- FALSE
}else{
custom.cut.param <- TRUE
}
if(setting == "MinCPM"){
MinlogCPMFilter.param <- TRUE
}else{
MinlogCPMFilter.param <- FALSE
}
for (SampleGroup in names(SampleIDs)) {
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]],
custom.cut = custom.cut.param, do.filter = T,
method = setting,
bcodes2reassign = NULL,
removeMaxN = TRUE,
MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[[SampleGroup]], ed = 2, mc = 3,
projectid = paste0(setting,"_",SampleGroup,"_MaxN_removal"),
outfolder = paste(setting,paste0(SampleGroup,"_MaxN_removal","_seqtk"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
}
# Remove barcodes that are shared between MM vs Es (Elow + Ehigh)
# loading dataframe with all counts passing threshold (0.05) including all replicates and after removal of
# barcodes that are present in more than N replicates
MM_E_replicates <- readRDS(file = paste0(rootdir,"MinPct/MM_E_MaxN_removal_seqtk/MinPct_MM_E_MaxN_removal_objects.rds"))
# Findout shared barcodes between MM and Es and assign to the higher replicate
# data input: MM_E_replicates$sharingbygroup$MM_vs_E_highdose
shared_MM_and_Es_with_replicates <- unique(c(rownames(MM_E_replicates$sharingbygroup$MM_vs_E_highdose),
rownames(MM_E_replicates$sharingbygroup$MM_vs_E_lowdose),
rownames(MM_E_replicates$sharingbygroup$MM_vs_E_highdose_vs_E_lowdose)))
saveRDS(object = shared_MM_and_Es_with_replicates, paste0(paperdata,"/shared_MM_and_Es_with_replicates_step01.rds"))
shared_MM_and_Es_with_replicates_mt <- MM_E_replicates$counts_abund_matrices$logCPM[shared_MM_and_Es_with_replicates,c(MM,Ehigh,Elow)]
shared_MM_and_Es_with_replicates_mt <- as.matrix(arrange_all(shared_MM_and_Es_with_replicates_mt,desc))
shared_MM_and_Es_with_replicates_mt_pcts <- MM_E_replicates$counts_abund_matrices$pct_abund[shared_MM_and_Es_with_replicates,c(MM,Ehigh,Elow)]
shared_MM_and_Es_with_replicates_mt_pcts <- as.matrix(arrange_all(shared_MM_and_Es_with_replicates_mt_pcts,desc))
# retain barcodes that are shared within dataset ####
MM_obj <- readRDS("MinPct/MM_MaxN_removal_seqtk/MinPct_MM_MaxN_removal_objects.rds")
Ehigh_obj <- readRDS("MinPct/Ehigh_MaxN_removal_seqtk/MinPct_Ehigh_MaxN_removal_objects.rds")
Elow_obj <- readRDS("MinPct/Elow_MaxN_removal_seqtk/MinPct_Elow_MaxN_removal_objects.rds")
shared_within_MM <- rownames(subset(MM_obj$aggregationbygroup_melt, subset = MM > 1))
shared_within_Ehigh <- rownames(subset(Ehigh_obj$aggregationbygroup_melt, subset = E_highdose > 1))
shared_within_Elow <- rownames(subset(Elow_obj$aggregationbygroup_melt, subset = E_lowdose > 1))
writeLines(text = shared_within_MM, con = paste0(paperdata,"/Shared_barcodes_within_MM.txt"))
writeLines(text = shared_within_Ehigh, con = paste0(paperdata,"/Shared_barcodes_within_Ehigh.txt")
writeLines(text = shared_within_Elow, con = paste0(paperdata,"/Shared_barcodes_within_Elow.txt")
dim(shared_MM_and_Es_with_replicates_mt_pcts)
index_to_retain <- which(!rownames(shared_MM_and_Es_with_replicates_mt_pcts) %in% unique(c(shared_within_MM, shared_within_Ehigh, shared_within_Elow)))
shared_MM_and_Es_with_replicates_mt_pcts <- shared_MM_and_Es_with_replicates_mt_pcts[index_to_retain,]
shared_MM_and_Es_with_replicates_mt <- shared_MM_and_Es_with_replicates_mt[index_to_retain,]
sds <- apply(shared_MM_and_Es_with_replicates_mt_pcts, MARGIN = 1, FUN = function(x) {sd(x[x > 0])})
maxs <- apply(shared_MM_and_Es_with_replicates_mt_pcts, MARGIN = 1, FUN = max)
second <- apply(shared_MM_and_Es_with_replicates_mt_pcts, MARGIN = 1, FUN = function(x) {sort(x, decreasing = T)[2]})
means <- apply(shared_MM_and_Es_with_replicates_mt_pcts, MARGIN = 1, FUN = mean)
ratio_max_to_2nd <- apply(shared_MM_and_Es_with_replicates_mt_pcts,
MARGIN = 1, FUN = max) / apply(shared_MM_and_Es_with_replicates_mt_pcts,
MARGIN = 1, FUN = function(x) {sort(x, decreasing = T)[2]})
mydf_weightedratio <- as.data.frame(ratio_max_to_2nd)
colnames(mydf_weightedratio) <- "ratioMax2Second"
mydf_weightedratio$weighted <- mydf_weightedratio$ratioMax2Second * means
mydf_weightedratio$logged <- log10(mydf_weightedratio$weighted)
reassignment_rule <- ifelse(mydf_weightedratio$logged > -1, 1, 0)
names(reassignment_rule) <- rownames(mydf_weightedratio)
saveRDS(object = ratio_max_to_2nd, file = paste0(paperdata,"/ratio_max_to_2nd_pcts_nofilt_within.rds"))
png(paste0(paperdata,"Baplot_ratio_max_2nd.png"), width = 9, height = 6, units = "in", res = 200)
barplot(log10(ratio_max_to_2nd[order(ratio_max_to_2nd, decreasing = T)]), xaxt='n',
xlab = "Shared barcodes - MM vs EX450K EX150K (replicates)",
main = "log10 ratio max value vs 2nd")
abline(h = 1, col = "red")
dev.off()
barcodes_to_reassign_A <- names(ratio_max_to_2nd[ratio_max_to_2nd > 10]) # filtering
barcodes_to_reassign_A_names <- colnames(MM_E_replicates$counts_abund_matrices$pct_abund[barcodes_to_reassign_A,])[apply(X = MM_E_replicates$counts_abund_matrices$pct_abund[barcodes_to_reassign_A,],
MARGIN = 1, FUN = function(x){which.max(x)})]
names(barcodes_to_reassign_A) <- barcodes_to_reassign_A_names
saveRDS(object = barcodes_to_reassign_A, file = paste0(paperdata,"/barcodes_to_reassign_A.rds"))
pheatmap::pheatmap(mat = shared_MM_and_Es_with_replicates_mt, cluster_cols = F, cluster_rows = F,
scale = "none", filename = paste0(paperdata, "Shared_barcodes_MM_Es_logCPM_before_reassignment.png"),
main = paste0("Shared barcodes (",length(shared_MM_and_Es_with_replicates),") between MM and Expanded", "\n",
"logCPM counts"),
show_rownames = F,
width = 9, height = 6, color = rev(brewer.pal(n = 9, "Spectral")))
shared_MM_and_Es_with_replicates_mt_melt <- as.data.frame(melt(shared_MM_and_Es_with_replicates_mt))
png(filename = paste0(paperdata, "Shared_barcodes_MM_Es_logCPM_ggplot_before_reassignment.png"),
width = 9, height = 6, units = "in", res = 300)
ggplot(data = shared_MM_and_Es_with_replicates_mt_melt,
mapping = aes(x = Var2, y = Var1, fill = value)) +
geom_tile() + xlab("Samples") + ylab("Barcodes") +
ggtitle(paste0("Shared barcodes (",length(shared_MM_and_Es_with_replicates),") between MM and Expanded"), subtitle = "logCPM counts") +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.text.y.left = element_blank(), axis.ticks.y.left = element_blank()) +
scale_fill_distiller(palette = "Spectral", direction = -1)
dev.off()
# re-running with re-assigning of shared barcodes between MM and Es ##
for(setting in c("MinPct")){
dir.create(path = setting, showWarnings = F)
if(setting == "Auto"){
custom.cut.param <- FALSE
}else{
custom.cut.param <- TRUE
}
if(setting == "MinCPM"){
MinlogCPMFilter.param <- TRUE
}else{
MinlogCPMFilter.param <- FALSE
}
for (SampleGroup in names(SampleIDs)) {
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets_replicates[[setting]],
custom.cut = custom.cut.param, do.filter = T,
method = setting, bcodes2reassign = barcodes_to_reassign_A,
removeMaxN = TRUE,
MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[[SampleGroup]], ed = 2, mc = 3,
projectid = paste0(setting,"_",SampleGroup,"_MaxN_removal_reassignA"),
outfolder = paste(setting,paste0(SampleGroup,"_MaxN_removal","_seqtk_reassignA"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
}
### Create final plots ####
MM_E_replicates_reassigned <- readRDS(file = paste0(rootdir,"MinPct/MM_E_MaxN_removal_seqtk_reassignA/MinPct_MM_E_MaxN_removal_reassignA_objects.rds"))
## Adding new column to
mydata <- as.data.frame(table(MM_E_replicates_reassigned$fulldata$SampleID))
colnames(mydata) <- c("SampleID","nBarcodes")
groupannot <- MM_E_replicates_reassigned$sampleinfo[,c("SampleID","Group")]
mydata <- merge.data.frame(mydata, groupannot, by = "SampleID", all.x = T,sort = F)
mydata$Group <- gsub(x = mydata$Group, pattern = "E_lowdose", "EX-U150K")
mydata$Group <- gsub(x = mydata$Group, pattern = "E_highdose", "EX-U450K")
mydata$Group <- factor(mydata$Group, levels = c("MM","EX-U450K","EX-U150K"))
png(filename = paste0(paperdata,"/Number_of_unique_barcodes_MM_Es_post_reassignment.png"),
width = 4, height = 6, units = "in", res = 300)
ggplot(data = mydata, mapping = aes(Group, y = nBarcodes, color = Group,
label = paste0("(",nBarcodes,")"))) +
geom_boxplot(outlier.shape = NA, fatten = 1, lwd=1) +
geom_point(position=position_jitterdodge(seed = 500)) +
geom_text_repel(show_legend = FALSE, size = 3, seed = 500) +
scale_y_continuous(trans = "log10") +
ggtitle(label = "Unique barcodes MM and EXs", subtitle = "filter: % > 0.005") +
#theme_bw() +
theme(legend.position = "none", axis.title = element_blank(),
panel.background = element_rect(fill = "white"),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.line = element_line(linetype = 1))
dev.off()
# AIM: plotting BC sharing within group (MM, Elow and Ehigh) - boxplot
library(openxlsx)
library(ggplot2)
library(ggrepel)
# load input
Ehigh <- readRDS("MinPct/Ehigh_MaxN_removal_seqtk_reassignA/MinPct_Ehigh_MaxN_removal_reassignA_objects.rds")
Elow <- readRDS("MinPct/Elow_MaxN_removal_seqtk_reassignA/MinPct_Elow_MaxN_removal_reassignA_objects.rds")
MM <- readRDS("MinPct/MM_MaxN_removal_seqtk_reassignA/MinPct_MM_MaxN_removal_reassignA_objects.rds")
# Sharing intra group
sharing_intra <- list()
sharing_intra[["MM"]] <- read.table(file = "MinPct/MM_MaxN_removal_seqtk_reassignA/Tables/Sharing_MinPct_MM_MaxN_removal_reassignA.txt",
nrows = 3)
sharing_intra[["Ehigh"]] <- read.table(file = "MinPct/Ehigh_MaxN_removal_seqtk_reassignA/Tables/Sharing_MinPct_Ehigh_MaxN_removal_reassignA.txt",
nrows = 6)
sharing_intra[["Elow"]] <- read.table(file = "MinPct/Elow_MaxN_removal_seqtk_reassignA/Tables/Sharing_MinPct_Elow_MaxN_removal_reassignA.txt",
nrows = 6)
for(j in names(sharing_intra)){
sharing_intra[[j]] <- subset(sharing_intra[[j]], select = c("V2","V5"))
sharing_intra[[j]]$Group <- j
colnames(sharing_intra[[j]]) <- c("SampleID","pct.share_1vsAll","Group")
sharing_intra[[j]]$pct.share_1vsAll <- gsub(x = sharing_intra[[j]]$pct.share_1vsAll, pattern = "%", replacement = "")
}
df <- rbind(sharing_intra[["MM"]], sharing_intra[["Ehigh"]])
df <- rbind(df, sharing_intra[["Elow"]])
df$pct.share_1vsAll <- as.numeric(df$pct.share_1vsAll)
df$Group <- gsub(x = df$Group, pattern = "Elow", "EX-U150K")
df$Group <- gsub(x = df$Group, pattern = "Ehigh", "EX-U450K")
df$Group <- factor(df$Group, levels = c("MM","EX-U450K","EX-U150K"))
png("PaperData/Pct_sharing_1_vs_All_intragroup_MM_Es.png",
width = 4, height = 4, units = "in", res = 300)
ggplot(df, mapping = aes(x = Group, y = pct.share_1vsAll, color = Group, label = SampleID)) +
geom_boxplot(outlier.shape = NA) + geom_jitter(position = position_jitter(seed = 6), show.legend = F) +
geom_text_repel(position = position_jitter(seed = 6), show.legend = F) +
theme_bw() + ylab("% sharing") +
ggtitle(label = "Barcoding sharing intra-group", subtitle = "One vs All") +
theme(axis.title.x = element_blank(),
#legend.position = c(0.25, 0.8),
legend.position = "none",
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
dev.off()
write.xlsx(df, file = "PaperData/Pct_sharing_1_vs_All_intragroup_MM_Es.xlsx", asTable = T)
### Entropy measures #
MM_E_reassigned <- readRDS(file = paste0(rootdir, "MinPct/MM_E_MaxN_removal_seqtk_reassignA/MinPct_MM_E_MaxN_removal_reassignA_objects.rds"))
shannon_MM_E <- diversity(MM_E_reassigned$counts_abund_matrices$Raw, index = "shannon", MARGIN = 2)
shannon_MM_E_df <- as.data.frame(shannon_MM_E)
colnames(shannon_MM_E_df) <- "Shannon"
shannon <- shannon_MM_E_df
saveRDS(shannon, file = "PaperData/Shannon_diversity.rds")
SampleID Group Cutoff MaxN VCN Engraftment PctNGFR
MM1 MM 0.005 2 2.84 25.5 66.39
MM3 MM 0.005 2 1.61 52.77 95.53
MM4 MM 0.005 2 2.11 26.56 88.79
E09 E_highdose 0.005 3 1.86 36.76 81.17
E10 E_highdose 0.005 3 1.77 21.19 73.39
E11 E_highdose 0.005 3 2.07 36.88 92.76
E12 E_highdose 0.005 3 1.85 20.83 91.43
E13 E_highdose 0.005 3 2.01 43.92 97.08
E14 E_highdose 0.005 3 1.35 24.26 89.71
E16 E_lowdose 0.005 3 1.76 12.11 51.44
E17 E_lowdose 0.005 3 1.87 4.37 28.89
E18 E_lowdose 0.005 3 1.73 6.05 48.25
E19 E_lowdose 0.005 3 1.33 23.58 73.31
E20 E_lowdose 0.005 3 1.53 1.21 43.94
E21 E_lowdose 0.005 3 1.74 6.65 67.06
E09E14 E_highdose 0.005 4 1.86 30.51 90.57
E16E21 E_lowdose 0.005 4 1.73 6.35 49.845
MM MM 0.005 4 2.19 34.94 88.79
\ No newline at end of file
SampleID Group Cutoff MaxN VCN Engraftment PctNGFR
CG_UM_01 CG_UM 0.005 4 1.244 49.27 56.56
CG_UM_02 CG_UM 0.005 4 0.862 39.51 48.46
CG_UM_03 CG_UM 0.005 4 1.13 24.89 48.29
CG_UM_04 CG_UM 0.005 4 0.779 49.62 38.2
CG_UM_05 CG_UM 0.005 4 0.96 22 48.44
CG_UM_06 CG_UM 0.005 4 0.844 57.05 50.48
CG_UM_07 CG_UM 0.005 4 1.215 71.27 63.59
CG_UM_08 CG_UM 0.005 4 0.711 49.7 41.95
CG_UM_2nd_01 CG_UM_2nd 0 4 1 4.51 50.29
CG_UM_2nd_02 CG_UM_2nd 0 4 1 7.56 15.25
CG_UM_2nd_03 CG_UM_2nd 0 4 1 1.41 42.31
CG_UM_2nd_04 CG_UM_2nd 0 4 1 1.56 63.85
CG_UMSR_12 CG_UMSR 0.005 4 0.769 47.88 46.08
CG_UMSR_13 CG_UMSR 0.005 4 0.709 51.83 44.16
CG_UMSR_14 CG_UMSR 0.005 4 0.803 24.77 53.88
CG_UMSR_15 CG_UMSR 0.005 4 0.53 40.5 32.81
CG_UMSR_16 CG_UMSR 0.005 4 0.463 63.83 32.13
CG_UMSR_17 CG_UMSR 0.005 4 0.613 33.51 34.26
CG_UMSR_18 CG_UMSR 0.005 4 0.506 67.05 35.42
CG_UMSR_19 CG_UMSR 0.005 4 0.563 61.64 34.02
CG_UMSR_20 CG_UMSR 0.005 4 0.595 42.11 33.91
CG_UMSR_21 CG_UMSR 0.005 4 0.428 65.74 29.73
CG_UMSR_2nd_05 CG_UMSR_2nd 0 4 1 5.62 44.82
CG_UMSR_2nd_06 CG_UMSR_2nd 0 4 1 2.58 23.98
CG_UMSR_2nd_07 CG_UMSR_2nd 0 4 1 16.01 25.11
CG_UMSR_2nd_08 CG_UMSR_2nd 0 4 1 11.19 89.78
SampleID Group Cutoff MaxN VCN Engraftment PctNGFR
CG_UM_01 CG_UM 0 4 1.244 49.27 56.56
CG_UM_02 CG_UM 0 4 0.862 39.51 48.46
CG_UM_03 CG_UM 0 4 1.13 24.89 48.29
CG_UM_04 CG_UM 0 4 0.779 49.62 38.2
CG_UM_05 CG_UM 0 4 0.96 22 48.44
CG_UM_06 CG_UM 0 4 0.844 57.05 50.48
CG_UM_07 CG_UM 0 4 1.215 71.27 63.59
CG_UM_08 CG_UM 0 4 0.711 49.7 41.95
CG_UM_2nd_01 CG_UM_2nd 0 4 1 4.51 50.29
CG_UM_2nd_02 CG_UM_2nd 0 4 1 7.56 15.25
CG_UM_2nd_03 CG_UM_2nd 0 4 1 1.41 42.31
CG_UM_2nd_04 CG_UM_2nd 0 4 1 1.56 63.85
CG_UMSR_12 CG_UMSR 0 4 0.769 47.88 46.08
CG_UMSR_13 CG_UMSR 0 4 0.709 51.83 44.16
CG_UMSR_14 CG_UMSR 0 4 0.803 24.77 53.88
CG_UMSR_15 CG_UMSR 0 4 0.53 40.5 32.81
CG_UMSR_16 CG_UMSR 0 4 0.463 63.83 32.13
CG_UMSR_17 CG_UMSR 0 4 0.613 33.51 34.26
CG_UMSR_18 CG_UMSR 0 4 0.506 67.05 35.42
CG_UMSR_19 CG_UMSR 0 4 0.563 61.64 34.02
CG_UMSR_20 CG_UMSR 0 4 0.595 42.11 33.91
CG_UMSR_21 CG_UMSR 0 4 0.428 65.74 29.73
CG_UMSR_2nd_05 CG_UMSR_2nd 0 4 1 5.62 44.82
CG_UMSR_2nd_06 CG_UMSR_2nd 0 4 1 2.58 23.98
CG_UMSR_2nd_07 CG_UMSR_2nd 0 4 1 16.01 25.11
CG_UMSR_2nd_08 CG_UMSR_2nd 0 4 1 11.19 89.78
# Run Analysis CG (Cell growth) samples
library(vegan)
library(dplyr)
source(file = "../ChartingBCs_Light.R")
library(RColorBrewer)
library(ggrepel)
paperdata <- "PaperData/"
dir.create(path = paperdata)
rootdir <- "./"
bseqdir <- paste0(rootdir,"Input/")
# using only the minpct version - here renamed simply to samplesheet
samplesheets <- list(MinPct = paste0(bseqdir,"SCGM_samplesheet.txt"))
# SampleIDs / groups present in the paper
# CG_UMSR : CG_UMSR_12-21
# CG_UM : CG_UM_01-08
# CG_UM_2nd : CG_UM_2nd_01-04
# CG_UMSR_2nd : CG_UMSR_2nd_05-08
CG_UMSR = c("CG_UMSR_12","CG_UMSR_13","CG_UMSR_14","CG_UMSR_15","CG_UMSR_16","CG_UMSR_17",
"CG_UMSR_18","CG_UMSR_19","CG_UMSR_20","CG_UMSR_21")
CG_UM = c("CG_UM_01","CG_UM_02","CG_UM_03","CG_UM_04",
"CG_UM_05","CG_UM_06","CG_UM_07","CG_UM_08")
CG_UM_2nd = c("CG_UM_2nd_01","CG_UM_2nd_02","CG_UM_2nd_03","CG_UM_2nd_04")
CG_UMSR_2nd = c("CG_UMSR_2nd_05","CG_UMSR_2nd_06","CG_UMSR_2nd_07","CG_UMSR_2nd_08")
SampleIDs <- list(CG_UMSR = CG_UMSR,
CG_UM = CG_UM,
CG_UMSR_2nd = CG_UMSR_2nd,
CG_UM_2nd = CG_UM_2nd,
UM = c(CG_UM,CG_UM_2nd),
UMSR = c(CG_UMSR,CG_UMSR_2nd),
UM_UMSR_1st = c(CG_UMSR,CG_UM),
UM_UMSR_2nd = c(CG_UMSR_2nd,CG_UM_2nd)
)
#### Basic analysis ####
for(setting in c("MinPct")){
dir.create(path = setting, showWarnings = F)
if(setting == "Auto"){
custom.cut.param <- FALSE
}else{
custom.cut.param <- TRUE
}
if(setting == "MinCPM"){
MinlogCPMFilter.param <- TRUE
}else{
MinlogCPMFilter.param <- FALSE
}
for (SampleGroup in names(SampleIDs)) {
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]],
custom.cut = custom.cut.param, do.filter = T,
method = setting,
removeMaxN = TRUE,
MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[[SampleGroup]], ed = 2, mc = 3,
projectid = paste0(setting,"_",SampleGroup,"_MaxN_removal"),
outfolder = paste(setting,paste0(SampleGroup,"_MaxN_removal","_seqtk"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
}
# Defining Barcodes to re-assign using the 2 group dataset with CG (UM and UMSR) ####
MinPct_UM_UMSR_1st_MaxN_removal_objects <- readRDS("MinPct/UM_UMSR_1st_MaxN_removal_seqtk/MinPct_UM_UMSR_1st_MaxN_removal_objects.rds")
tmp.obj <- MinPct_UM_UMSR_1st_MaxN_removal_objects
# potentially to re-assign
shared_CG_UM_UMSR_first <- rownames(tmp.obj$sharingbygroup$CG_UM_vs_CG_UMSR)
saveRDS(object = shared_CG_UM_UMSR_first, paste0(paperdata,"PaperData/shared_CG_UM_UMSR_first_step01.rds"))
shared_CG_UM_UMSR_first_mt <- tmp.obj$counts_abund_matrices$logCPM[shared_CG_UM_UMSR_first,c(CG_UM,CG_UMSR)]
shared_CG_UM_UMSR_first_mt <- as.matrix(arrange_all(shared_CG_UM_UMSR_first_mt,desc))
shared_CG_UM_UMSR_first_mt_pcts <- tmp.obj$counts_abund_matrices$pct_abund[shared_CG_UM_UMSR_first,c(CG_UM,CG_UMSR)]
shared_CG_UM_UMSR_first_mt_pcts <- as.matrix(arrange_all(shared_CG_UM_UMSR_first_mt_pcts,desc))
# exclude BCs that are shared within group from the list of BCs to be re-assigned ####
shared_within_CG_UM <- rownames(subset(tmp.obj$aggregationbygroup_melt, subset = CG_UM > 1))
shared_within_CG_UMSR <- rownames(subset(tmp.obj$aggregationbygroup_melt, subset = CG_UMSR > 1))
writeLines(text = shared_within_CG_UM, con = paste0(paperdata,"/Shared_barcodes_within_CG_UM.txt"))
writeLines(text = shared_within_CG_UMSR, con = paste0(paperdata,"/Shared_barcodes_within_CG_UMSR.txt"))
# from the potentially BCs to be re-assigned exclude the one that are also shared within group
index_to_retain <- which(!rownames(shared_CG_UM_UMSR_first_mt_pcts) %in% union(shared_within_CG_UMSR, shared_within_CG_UM)) # if not intragroup sharing is affected
shared_CG_UM_UMSR_first_mt_pcts <- shared_CG_UM_UMSR_first_mt_pcts[index_to_retain,]
shared_CG_UM_UMSR_first_mt <- shared_CG_UM_UMSR_first_mt[index_to_retain,]
# adding new method of re-assignment based on std dev
sds <- apply(shared_CG_UM_UMSR_first_mt_pcts, MARGIN = 1, FUN = function(x) {sd(x[x > 0])})
maxs <- apply(shared_CG_UM_UMSR_first_mt_pcts, MARGIN = 1, FUN = max)
second <- apply(shared_CG_UM_UMSR_first_mt_pcts, MARGIN = 1, FUN = function(x) {sort(x, decreasing = T)[2]})
means <- apply(shared_CG_UM_UMSR_first_mt_pcts, MARGIN = 1, FUN = mean)
ratio_max_to_2nd <- apply(shared_CG_UM_UMSR_first_mt_pcts,
MARGIN = 1, FUN = max) / apply(shared_CG_UM_UMSR_first_mt_pcts,
MARGIN = 1, FUN = function(x) {sort(x, decreasing = T)[2]})
mydf_weightedratio <- as.data.frame(ratio_max_to_2nd)
colnames(mydf_weightedratio) <- "ratioMax2Second"
mydf_weightedratio$weighted <- mydf_weightedratio$ratioMax2Second * means
mydf_weightedratio$logged <- log10(mydf_weightedratio$weighted)
reassignment_rule <- ifelse(mydf_weightedratio$logged > -1, 1, 0)
names(reassignment_rule) <- rownames(mydf_weightedratio)
saveRDS(object = ratio_max_to_2nd, file = paste0(paperdata,"/ratio_max_to_2nd_CG_UM_UMSR_first.rds"))
saveRDS(object = mydf_weightedratio, file = paste0(paperdata,"/mydf_weightedratio_CG_UM_UMSR_first.rds"))
saveRDS(object = means, file = paste0(paperdata,"/means_CG_UM_UMSR_first.rds"))
saveRDS(object = reassignment_rule, file = paste0(paperdata,"/reassignment_rule_CG_UM_UMSR_first.rds"))
####
barcodes_to_reassign <- names(ratio_max_to_2nd[ratio_max_to_2nd > 10]) # filtering
barcodes_to_reassign_names <- colnames(tmp.obj$counts_abund_matrices$pct_abund[barcodes_to_reassign,])[apply(X = tmp.obj$counts_abund_matrices$pct_abund[barcodes_to_reassign,], MARGIN = 1, FUN = function(x){which.max(x)})]
names(barcodes_to_reassign) <- barcodes_to_reassign_names
saveRDS(object = barcodes_to_reassign, file = "PaperData/barcodes_to_reassign_CG_UM_UMSR_first.rds")
# re-running with re-assigning of shared barcodes between CG_UM and CG_UMSR ##
# Note: barcodes shared also within condition are removed from re-assignment ####
for(setting in c("MinPct")){
dir.create(path = setting, showWarnings = F)
if(setting == "Auto"){
custom.cut.param <- FALSE
}else{
custom.cut.param <- TRUE
}
if(setting == "MinCPM"){
MinlogCPMFilter.param <- TRUE
}else{
MinlogCPMFilter.param <- FALSE
}
for (SampleGroup in names(SampleIDs[c(1,2,5,6,7)])) { # pickup only groups that contain 1st transplant samples, exclude only 2nd
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]], custom.cut = custom.cut.param, do.filter = T,
method = setting, bcodes2reassign = barcodes_to_reassign,
removeMaxN = FALSE, MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[c(1,2,5,6,7)][[SampleGroup]],
ed = 2, mc = 3,
projectid = paste0(setting,"_",SampleGroup),
outfolder = paste(setting, paste0(SampleGroup,"_seqtk_reassign"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
for (SampleGroup in names(SampleIDs[c(1,2,5,6,7)])) { # pickup only groups that contain 1st transplant samples
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]],
custom.cut = custom.cut.param, do.filter = T,
method = setting, bcodes2reassign = barcodes_to_reassign,
removeMaxN = TRUE,
MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[c(1,2,5,6,7)][[SampleGroup]], ed = 2, mc = 3,
projectid = paste0(setting,"_",SampleGroup,"_MaxN_removal"),
outfolder = paste(setting,paste0(SampleGroup,"_MaxN_removal","_seqtk_reassign"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
}
### Create final plots ####
UM_UMSR_1st_reassigned <- readRDS(file = paste0(rootdir,"MinPct/UM_UMSR_1st_MaxN_removal_seqtk_reassign/MinPct_UM_UMSR_1st_MaxN_removal_objects.rds"))
# look at numbers #
table(UM_UMSR_1st_reassigned$fulldata$Group)
# CG_UM CG_UMSR
# 4466 5127
## Adding new column to
mydata <- as.data.frame(table(UM_UMSR_1st_reassigned$fulldata$SampleID))
colnames(mydata) <- c("SampleID","nBarcodes")
mydata$Group <- c(rep(x = "CG_UM", 8), rep("CG_UMSR",10))
png(filename = "PaperData/Number_of_unique_barcodes_CG_UM_UMSR_post_reassignment.png",
width = 9, height = 6, units = "in", res = 300)
ggplot(data = mydata, mapping = aes(Group, y = nBarcodes, color = Group,
label = paste0(SampleID,"_",nBarcodes))) +
geom_point() + geom_label_repel(show_guide = FALSE) +
ggtitle(label = "Unique barcodes CG_UM and CG_UMSR", subtitle = "filter: % > 0.005") +
theme(legend.position = "none", axis.title = element_blank(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
dev.off()
# shared barcodes after reassignment ####
UM_UMSR_1st_reassigned_shared <- rownames(UM_UMSR_1st_reassigned$sharingbygroup$CG_UM_vs_CG_UMSR)
UM_UMSR_1st_reassigned_shared_mt <- UM_UMSR_1st_reassigned$counts_abund_matrices$logCPM[UM_UMSR_1st_reassigned_shared,c(CG_UM,CG_UMSR)]
UM_UMSR_1st_reassigned_shared_mt <- as.matrix(arrange_all(UM_UMSR_1st_reassigned_shared_mt))
UM_UMSR_1st_reassigned_shared_mt_melt <- as.data.frame(melt(UM_UMSR_1st_reassigned_shared_mt))
colnames(UM_UMSR_1st_reassigned_shared_mt_melt) <- c("Barcode","SampleID","logCPM")
fulldata_subset <- subset(UM_UMSR_1st_reassigned$fulldata, select = c("Barcode", "SampleID", "Rank"),
subset = Barcode %in% UM_UMSR_1st_reassigned_shared_mt_melt$Barcode)
UM_UMSR_1st_reassigned_shared_mt_melt_rank <- merge.data.frame(UM_UMSR_1st_reassigned_shared_mt_melt, fulldata_subset, sort = F)
png(filename = paste0(paperdata, "Shared_barcodes_UM_UMSR_1st_logCPM_ggplot_post_reassignment.png"),
width = 6, height = 6, units = "in", res = 300)
ggplot(data = UM_UMSR_1st_reassigned_shared_mt_melt_rank,
mapping = aes(x = SampleID, y = Barcode, fill = logCPM, label = Rank)) +
geom_tile() + xlab("Samples") + ylab("Barcodes") + geom_text(size = 3) +
#scale_x_discrete(guide = guide_axis(n.dodge=3)) +
ggtitle(paste0("Shared barcodes (",length(UM_UMSR_1st_reassigned_shared),") between CG_UM and CG_UMSR"), subtitle = "logCPM counts (rank)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
axis.text.y.left = element_blank(), axis.ticks.y.left = element_blank()) +
scale_fill_distiller(palette = "Spectral", direction = -1)
dev.off()
# shared barcodes after reassignment ####
tmp2_data <- rownames(do.call(what = rbind, UM_UMSR_1st_reassigned$sharingbygroup))
CG_UM_UMSR_1st_reassigned_shared <- unique(gsub(x = tmp2_data, pattern = ".*\\.", replacement = ""))
CG_UM_UMSR_1st_reassigned_shared_mt <- UM_UMSR_1st_reassigned$counts_abund_matrices$logCPM[CG_UM_UMSR_1st_reassigned_shared,c(CG_UM,CG_UMSR)]
CG_UM_UMSR_1st_reassigned_shared_mt <- as.matrix(arrange_all(CG_UM_UMSR_1st_reassigned_shared_mt))
CG_UM_UMSR_1st_reassigned_shared_mt_melt <- as.data.frame(melt(CG_UM_UMSR_1st_reassigned_shared_mt))
colnames(CG_UM_UMSR_1st_reassigned_shared_mt_melt) <- c("Barcode","SampleID","logCPM")
fulldata_subset <- subset(UM_UMSR_1st_reassigned$fulldata, select = c("Barcode", "SampleID", "Rank"),
subset = Barcode %in% CG_UM_UMSR_1st_reassigned_shared_mt_melt$Barcode)
CG_UM_UMSR_1st_reassigned_shared_mt_melt_rank <- merge.data.frame(CG_UM_UMSR_1st_reassigned_shared_mt_melt, fulldata_subset, sort = F)
png(filename = paste0(paperdata, "Shared_barcodes_CG_UM_UMSR_1st_logCPM_ggplot_post_reassignment.png"),
width = 6, height = 6, units = "in", res = 300)
ggplot(data = CG_UM_UMSR_1st_reassigned_shared_mt_melt_rank,
mapping = aes(x = SampleID, y = Barcode, fill = logCPM, label = Rank)) +
geom_tile() + xlab("Samples") + ylab("Barcodes") + geom_text(size = 3) +
ggtitle(paste0("Shared barcodes (",length(CG_UM_UMSR_1st_reassigned_shared),") between CG UM and UMSR"), subtitle = "logCPM counts (rank)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
axis.text.y.left = element_blank(), axis.ticks.y.left = element_blank()) +
scale_fill_distiller(palette = "Spectral", direction = -1)
dev.off()
library(openxlsx)
library(ggplot2)
library(ggrepel)
# sharing within groups
sharing_intra <- list()
sharing_intra[["CG_UM"]] <- read.table(file = "MinPct/CG_UM_MaxN_removal_seqtk_reassign/Tables/Sharing_MinPct_CG_UM_MaxN_removal.txt",
nrows = 8)
sharing_intra[["CG_UMSR"]] <- read.table(file = "MinPct/CG_UMSR_MaxN_removal_seqtk_reassign/Tables/Sharing_MinPct_CG_UMSR_MaxN_removal.txt",
nrows = 10)
for(j in names(sharing_intra)){
sharing_intra[[j]] <- subset(sharing_intra[[j]], select = c("V2","V5"))
sharing_intra[[j]]$Group <- j
colnames(sharing_intra[[j]]) <- c("SampleID","pct.share_1vsAll","Group")
sharing_intra[[j]]$pct.share_1vsAll <- gsub(x = sharing_intra[[j]]$pct.share_1vsAll, pattern = "%", replacement = "")
}
df <- rbind(sharing_intra[["CG_UM"]], sharing_intra[["CG_UMSR"]])
df$pct.share_1vsAll <- as.numeric(df$pct.share_1vsAll)
df$Group <- gsub(x = df$Group, pattern = "UM",replacement = "U")
df$Group <- gsub(x = df$Group, pattern = "SR",replacement = "S")
df$Group <- factor(df$Group, levels = c("CG_U", "CG_US"))
write.xlsx(df, file = paste0(paperdata,"/Pct_sharing_1_vs_All_intragroup_CG_UM_and_UMSR.xlsx"),
asTable = T)
### Entropy measures #
shannon_ALL <- diversity(UM_UMSR_1st_reassigned$counts_abund_matrices$Raw, index = "shannon", MARGIN = 2)
shannon_ALL_df <- as.data.frame(shannon_ALL)
colnames(shannon_ALL_df) <- "Shannon"
saveRDS(shannon_ALL_df, paste0(paperdata, "Shannons.rds"))
library(ggplot2)
library(vegan)
library(pheatmap)
library(vegan)
library(dplyr)
source(file = "../ChartingBCs_Light.R")
library(RColorBrewer)
library(ggrepel)
library(ggalluvial)
paperdata <- "PaperData/"
dir.create(path = paperdata)
rootdir <- "./"
bseqdir <- paste0(rootdir,"Input/")
# using only the minpct version - here renamed simply to samplesheet
samplesheets <- list(MinPct = paste0(bseqdir,"SCGM_samplesheet_1st_2nd.txt"))
# SampleIDs / groups present in the paper
# CG_UMSR : CG_UMSR_12-21
# CG_UM : CG_UM_01-08
# CG_UM_2nd : CG_UM_2nd_01-04
# CG_UMSR_2nd : CG_UMSR_2nd_05-08
CG_UMSR = c("CG_UMSR_12","CG_UMSR_13","CG_UMSR_14","CG_UMSR_15","CG_UMSR_16","CG_UMSR_17",
"CG_UMSR_18","CG_UMSR_19","CG_UMSR_20","CG_UMSR_21")
CG_UM = c("CG_UM_01","CG_UM_02","CG_UM_03","CG_UM_04",
"CG_UM_05","CG_UM_06","CG_UM_07","CG_UM_08")
CG_UM_2nd = c("CG_UM_2nd_01","CG_UM_2nd_02","CG_UM_2nd_03","CG_UM_2nd_04")
CG_UMSR_2nd = c("CG_UMSR_2nd_05","CG_UMSR_2nd_06","CG_UMSR_2nd_07","CG_UMSR_2nd_08")
SampleIDs <- list(UM = c(CG_UM,CG_UM_2nd),
UMSR = c(CG_UMSR,CG_UMSR_2nd)
)
#### Basic analysis ####
for(setting in c("MinPct")){
dir.create(path = setting, showWarnings = F)
if(setting == "Auto"){
custom.cut.param <- FALSE
}else{
custom.cut.param <- TRUE
}
if(setting == "MinCPM"){
MinlogCPMFilter.param <- TRUE
}else{
MinlogCPMFilter.param <- FALSE
}
for (SampleGroup in names(SampleIDs)) {
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]], custom.cut = custom.cut.param, do.filter = T,
method = setting,
removeMaxN = FALSE, MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[[SampleGroup]],
ed = 2, mc = 3,
projectid = paste0(setting,"_noFILT_",SampleGroup),
outfolder = paste(paste0(setting,"_noFILT/"), paste0(SampleGroup,"_seqtk"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
for (SampleGroup in names(SampleIDs)) {
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]],
custom.cut = custom.cut.param, do.filter = T,
method = setting,
removeMaxN = TRUE,
MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[[SampleGroup]], ed = 2, mc = 3,
projectid = paste0(setting,"_noFILT_",SampleGroup,"_MaxN_removal"),
outfolder = paste(paste0(setting,"_noFILT/"), paste0(SampleGroup,"_MaxN_removal","_seqtk"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
}
### Plotting Sharing 1st and 2nd ####
CG_UM_unfiltered <- readRDS(file = paste0(rootdir,"MinPct_noFILT/UM_MaxN_removal_seqtk/MinPct_noFILT_UM_MaxN_removal_objects.rds"))
shared_elements <- CG_UM_unfiltered$counts_abund_matrices$pct_abund[rownames(CG_UM_unfiltered$sharingbygroup$CG_UM_vs_CG_UM_2nd),]
shared_elements$Barcode <- rownames(shared_elements)
shared_elements_melt <- reshape2::melt(data = shared_elements, id.vars = c("Barcode"))
shared_elements_melt_annot <- merge.data.frame(shared_elements_melt,
CG_UM_unfiltered$sampleinfo,
by.x = "variable", by.y = "SampleID",
all.x = T, sort = F)
shared_elements_melt_annot$variable <- gsub(x = shared_elements_melt_annot$variable, pattern = "CG_UM_", replacement = "")
shared_elements_melt_annot$value[which(shared_elements_melt_annot$value == 0)] <- NA
shared_elements_melt_annot$Group <- gsub(x = shared_elements_melt_annot$Group, pattern = "UM",replacement = "U")
shared_elements_melt_annot$Group <- factor(shared_elements_melt_annot$Group, levels = c("CG_U", "CG_U_2nd"))
png(filename = paste0(paperdata,"CG_UM_1st_2nd_shared_heatmap_full.png"),width = 8, height = 6, units = "in", res = 300)
ggplot(data = shared_elements_melt_annot, mapping = aes(y = Barcode, x = variable, fill = value, label = round(value,4))) +
geom_tile() +
geom_text(size = 2, color = "black") +
facet_grid( . ~ Group, scale = "free", space = "free") +
scale_x_discrete(guide = guide_axis(n.dodge=2)) +
scale_fill_distiller(palette = "Spectral", direction = -1, na.value = "white") +
theme_bw() +
theme(axis.title = element_blank(), axis.text = element_text(size = 10),
legend.position = "none",
strip.text = element_text(size = 10, face = "bold"),
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
) +
ggtitle(label = "Shared barcodes between 1st and 2nd transplant", subtitle = "CellGro Cell Culture Medium") +
geom_rect(data = data.frame(Group = 'CG_U'),
aes(xmin=c(0.5), xmax=c(1.5), ymin=c(24), ymax=c(26)),
colour="red", fill="transparent", size=1, inherit.aes = FALSE)
dev.off()
CG_UMSR_unfiltered <- readRDS(file = paste0(rootdir,"MinPct_noFILT/UMSR_MaxN_removal_seqtk/MinPct_noFILT_UMSR_MaxN_removal_objects.rds"))
shared_elements <- CG_UMSR_unfiltered$counts_abund_matrices$pct_abund[rownames(CG_UMSR_unfiltered$sharingbygroup$CG_UMSR_vs_CG_UMSR_2nd),]
shared_elements$Barcode <- rownames(shared_elements)
shared_elements_melt <- reshape2::melt(data = shared_elements, id.vars = c("Barcode"))
shared_elements_melt_annot <- merge.data.frame(shared_elements_melt,
CG_UMSR_unfiltered$sampleinfo,
by.x = "variable", by.y = "SampleID",
all.x = T, sort = F)
shared_elements_melt_annot$variable <- gsub(x = shared_elements_melt_annot$variable, pattern = "CG_UMSR_", replacement = "")
shared_elements_melt_annot$value[which(shared_elements_melt_annot$value == 0)] <- NA
shared_elements_melt_annot$Group <- gsub(x = shared_elements_melt_annot$Group, pattern = "UM",replacement = "U")
shared_elements_melt_annot$Group <- gsub(x = shared_elements_melt_annot$Group, pattern = "SR",replacement = "S")
shared_elements_melt_annot$Group <- factor(shared_elements_melt_annot$Group, levels = c("CG_US", "CG_US_2nd"))
png(filename = paste0(paperdata,"CG_UMSR_1st_2nd_shared_heatmap_full.png"),width = 8, height = 6, units = "in", res = 300)
ggplot(data = shared_elements_melt_annot, mapping = aes(y = Barcode, x = variable, fill = value, label = round(value,4))) +
geom_tile() +
geom_text(size = 2, color = "black") +
facet_grid( . ~ Group, scale = "free", space = "free") +
scale_x_discrete(guide = guide_axis(n.dodge=2)) +
scale_fill_distiller(palette = "Spectral", direction = -1, na.value = "white") +
#scale_fill_viridis_c(alpha = 0.9,option = "A",
# breaks = c(0,0.01, 0.05, 0.1, 0.5, 1,2,5,10,20,30,50,80,100), direction = -1)
theme_bw() +
theme(axis.title = element_blank(), axis.text = element_text(size = 8),
legend.position = "none",
strip.text = element_text(size = 10, face = "bold"),
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
) +
ggtitle(label = "Shared barcodes between 1st and 2nd transplant", subtitle = "CellGro Cell Culture MediUMSR") +
geom_rect(data = data.frame(Group = 'CG_US'),
aes(xmin=c(6.5), xmax=c(7.5), ymin=c(0.5), ymax=c(1.5)),
colour="red", fill="transparent", size=1, inherit.aes = FALSE)
dev.off()
# Evaluate N° of clones in 1st and 2nd transplant ####
mydataU <- as.data.frame(table(CG_UM_unfiltered$fulldata$SampleID))
colnames(mydataU) <- c("SampleID","nBarcodes")
groupannot <- CG_UM_unfiltered$sampleinfo[,c("SampleID","Group")]
mydataU <- merge.data.frame(mydataU, groupannot, by = "SampleID", all.x = T,sort = F)
mydataUS <- as.data.frame(table(CG_UMSR_unfiltered$fulldata$SampleID))
colnames(mydataUS) <- c("SampleID","nBarcodes")
groupannot <- CG_UMSR_unfiltered$sampleinfo[,c("SampleID","Group")]
mydataUS <- merge.data.frame(mydataUS, groupannot, by = "SampleID", all.x = T,sort = F)
mydata <- rbind(mydataU, mydataUS)
mydata$Group <- gsub(x = mydata$Group, pattern = "UM",replacement = "U")
mydata$Group <- gsub(x = mydata$Group, pattern = "SR",replacement = "S")
mydata$Group <- factor(x = mydata$Group, levels = c("CG_U","CG_US","CG_U_2nd","CG_US_2nd"))
png(filename = "PaperData/Number_of_unique_barcodes_CG_1st_2nd.png",
width = 3, height = 4, units = "in", res = 300)
ggplot(data = mydata, mapping = aes(Group, y = nBarcodes, color = Group,
label = paste0("(",nBarcodes,")"))) +
geom_boxplot(outlier.shape = NA, fatten = 0.4, lwd=0.8) +
scale_x_discrete(guide = guide_axis(n.dodge=2)) +
geom_point(position=position_jitterdodge(seed = 500, jitter.width = 0.8)) +
geom_text_repel(show_legend = FALSE, size = 3, seed = 500) +
ggtitle(label = "n° barcodes (1st and 2nd)", subtitle = "no filtering") +
theme(legend.position = "none", axis.title = element_blank(),
panel.background = element_rect(fill = "white"),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.line = element_line(linetype = 1))
dev.off()
png(filename = "PaperData/Number_of_unique_barcodes_CG_2nd.png",
width = 2, height = 4, units = "in", res = 300)
ggplot(data = subset(mydata, subset = Group %in% c("CG_U_2nd","CG_US_2nd")),
mapping = aes(Group, y = nBarcodes, color = Group,
label = paste0("(",nBarcodes,")"))) +
scale_fill_manual(values = c("#357BCF","#8DD3FB")) +
scale_color_manual(values = c("#357BCF","#8DD3FB")) +
geom_boxplot(outlier.shape = NA, fatten = 0.8, lwd=0.8) +
scale_x_discrete(guide = guide_axis(n.dodge=2)) +
geom_point(position=position_jitterdodge(seed = 500, jitter.width = 0.8)) +
ggtitle(label = "n° barcodes 2nd", subtitle = "No filtering") +
theme(panel.background = element_rect(fill = "white"),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.line = element_line(linetype = 1),
legend.position = "none",
legend.box.background = element_blank(),
legend.key = element_rect(fill = "white"),
legend.text = element_text(size = 8),
legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 12, color = "black"),
axis.text.x = element_text(angle = 0, size = 12, color = "black"),
strip.text = element_text(size = 8))
dev.off()
##### Creating plots ofr showing the clonal structure in 1st vs 2nd ###
# Idea: subdivide in bins of abundance and plots classes in 1st and second on shared elements only
library(ggalluvial)
first_second_sharing_fold <- paste0(paperdata, "1st_vs_2nd_sharing/")
dir.create(first_second_sharing_fold)
clonal_structure_1st_2nd <- list(objects = list(),
shared_elements = list(),
shared_elements_grouped = list())
for(group in c("UM","UMSR")){
clonal_structure_1st_2nd[["objects"]][[group]] <- readRDS(file = paste0(rootdir,"MinPct_noFILT/",group,"_MaxN_removal_seqtk/MinPct_noFILT_",group,"_MaxN_removal_objects.rds"))
shared_elements <- clonal_structure_1st_2nd[["objects"]][[group]]$counts_abund_matrices$pct_abund[rownames(clonal_structure_1st_2nd[["objects"]][[group]]$sharingbygroup[[paste0("CG_",group,"_vs_CG_",group,"_2nd")]]),]
shared_elements$Barcode <- rownames(shared_elements)
shared_elements_melt <- reshape2::melt(data = shared_elements, id.vars = c("Barcode"))
shared_elements_melt_annot <- merge.data.frame(shared_elements_melt,
clonal_structure_1st_2nd[["objects"]][[group]]$sampleinfo,
by.x = "variable", by.y = "SampleID",
all.x = T, sort = F)
shared_elements_melt_annot$variable <- gsub(x = shared_elements_melt_annot$variable, pattern = paste0("CG_",group,"_"), replacement = "")
shared_elements_melt_annot$value[which(shared_elements_melt_annot$value == 0)] <- NA
clonal_structure_1st_2nd[["shared_elements"]][[group]] <- shared_elements_melt_annot
clonal_structure_1st_2nd[["shared_elements_grouped"]][[group]] <- clonal_structure_1st_2nd[["shared_elements"]][[group]] %>%
group_by(Group, Barcode) %>%
summarise(mean_val = mean(value, na.rm = T),
mean_VCN = mean(VCN, na.rm = T),
mean_Engraftment = mean(Engraftment, na.rm = T),
mean_PctNGFR = mean(PctNGFR, na.rm = T))
clonal_structure_1st_2nd[["shared_elements_grouped"]][[group]]$Category <- cut(x = clonal_structure_1st_2nd[["shared_elements_grouped"]][[group]]$mean_val,
breaks = c(0,0.1,1,100),
labels = c("rare (0 - 0.1%)",
"medium (0.1 - 1%)",
"expanded ( > 1%)"))
png(filename = paste0(first_second_sharing_fold,"clonal_structure_1st_vs_2nd_",group,"_grouped_no0s.png"),
width = 6, height = 6, units = "in", res = 300)
print(ggplot(clonal_structure_1st_2nd[["shared_elements_grouped"]][[group]],
mapping = aes(Group)) +
geom_bar(mapping = aes(fill = Category), position = "fill") +
ggtitle("Shared barcoded 1st vs 2nd",
subtitle = paste0("Abundance levels (CG-",group,") - grouped mean no 0s")) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)))
dev.off()
# Create an alluvial plot of this aggregated barcodes abundances between 1st and 2nd
allbcs <- clonal_structure_1st_2nd[["shared_elements_grouped"]][[group]]$Barcode
colpalette <- colorRampPalette(RColorBrewer::brewer.pal(12, name = "Paired"))(length(unique(allbcs)))
png(filename = paste0(first_second_sharing_fold, "clonal_structure_1st_vs_2nd_",group,"_grouped_alluvium_track_no0s.png"),
width = 6, height = 6, units = "in", res = 300)
print(ggplot(data = clonal_structure_1st_2nd[["shared_elements_grouped"]][[group]],
aes(x = Group, y = mean_val, alluvium = Barcode, label = Barcode)) +
ylab("Mean abundance %") +
geom_alluvium(aes(fill = Barcode),
alpha = .75, decreasing = FALSE, width = 1/4) +
geom_stratum(aes(stratum = Barcode, fill = Barcode), alpha = .50, decreasing = FALSE, width = 1/4) +
scale_fill_manual(values = alpha(colour = colpalette, alpha = 0.50)) +
theme_bw() +
ggtitle(label = "Shared barcodes tracking", subtitle = paste0("CG-", group, " aggregation by group - mean no 0s")) +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)))
dev.off()
}
saveRDS(object = clonal_structure_1st_2nd, file = paste0(paperdata,"/1st_vs_2nd_sharing/clonal_structure_1st_2nd_mean_no0s.rds")
library(openxlsx)
write.xlsx(x = clonal_structure_1st_2nd$shared_elements_grouped,
file = paste0(paperdata"/1st_vs_2nd_sharing/clonal_structure_1st_2nd_grouped_sharing_mean_no0s.xlsx"), asTable = T)
# Combine image - larger size ####
DataAnalysis <- readRDS(paste0(paperdata,"/1st_vs_2nd_sharing/clonal_structure_1st_2nd_mean_no0s.rds"))
for(i in names(DataAnalysis$shared_elements_grouped)){
DataAnalysis$shared_elements_grouped[[i]]$Condition <- i
}
mydata <- rbind.data.frame(DataAnalysis$shared_elements_grouped$UM, DataAnalysis$shared_elements_grouped$UMSR)
mydata$Transplant <- "Primary"
mydata$Transplant[grep(pattern = "2nd", x = mydata$Group)] <- "Secondary"
mydata$Transplant <- gsub(x = mydata$Transplant, pattern = "Primary", replacement = "1st")
mydata$Transplant <- gsub(x = mydata$Transplant, pattern = "Secondary", replacement = "2nd")
allbcs <- mydata$Barcode
colpalette <- colorRampPalette(RColorBrewer::brewer.pal(12, name = "Paired"))(length(unique(allbcs)))
png(filename = paste0(paperdata,"Combo_clonal_structure_1st_vs_2nd_alluvium_track.png"),
width = 6, height = 6, units = "in", res = 300)
print(ggplot(data = mydata,
aes(x = Transplant, y = mean_val, alluvium = Barcode, label = Barcode)) +
ylab("Mean abundance % - cumulative") +
geom_alluvium(aes(fill = Barcode),
alpha = .75, decreasing = FALSE, width = 1/4) +
geom_stratum(aes(stratum = Barcode, fill = Barcode), alpha = .50, decreasing = FALSE, width = 1/4) +
scale_fill_manual(values = alpha(colour = colpalette, alpha = 0.50)) +
facet_grid(. ~ Condition,scales = "free") +
ggtitle(label = "Shared BC tracking", subtitle = paste0("aggregation by group - mean (na.rm)")) +
theme(legend.position = "none",
axis.text = element_text(size = 20, color = "black"),
axis.title = element_blank(),
axis.text.y.left = element_text(size = 20, angle = -270, hjust = 0.5, color = "black"),
panel.background = element_rect(fill = "white"),
strip.text.x = element_text(size = 20),
axis.line = element_line(linetype = 1),
strip.background.x = element_rect(fill = "white"),
plot.subtitle = element_blank(),
plot.title = element_blank(),
))
dev.off()
SampleID Group Cutoff MaxN VCN Engraftment PctNGFR
ms01 DPlike 0.005 3 0.3164 52 23.8
ms02 DPlike 0.005 3 0.3096 47.82 18.87
ms03 DPlike 0.005 3 0.3579 44.54 29.45
ms04 DPlike 0.005 3 0.5771 27.97 29.82
ms05 DPlike 0.005 3 0.3527 34.71 18.85
ms06 DPlike 0.005 3 0.4915 35.2 28.92
ms09 EXU1 0.005 4 0.533 31.16 33.22
ms10 EXU1 0.005 4 0.268 24.41 18.49
ms11 EXU1 0.005 4 0.211 30.44 16.98
ms12 EXU1 0.005 4 0.26 32.4 23.76
ms13 EXU1 0.005 4 0.16 38.08 17.94
ms14 EXU1 0.005 4 0.263 62.23 28.24
ms15 EXU1 0.005 4 0.457 57.21 25.87
ms16 EXU1 0.005 4 0.174 79.3 17.35
ms17 EXU3 0.005 4 3.005 9.22 94.96
ms18 EXU3 0.005 4 1.256 7.16 40.59
ms19 EXU3 0.005 4 2.849 1.64 83.84
ms20 EXU3 0.005 4 2.086 3.19 46.81
ms21 EXU3 0.005 4 1.638 15.03 76.04
ms22 EXU3 0.005 4 0.339 13.35 24.1
ms23 EXU3 0.005 4 1.802 4.04 74.94
ms24 EXU3 0.005 4 0.252 47.76 14.87
ms25 EXU2 0.005 3 1.028 37.38 54.43
ms26 EXU2 0.005 3 0.807 68.93 49.12
ms27 EXU2 0.005 3 0.649 59.51 40.94
ms29 EXU2 0.005 3 0.664 35.69 41.24
ms30 EXU2 0.005 3 0.588 53.67 37.27
ms31 EXU2 0.005 3 0.623 39.7 36.04
ms32 EXU2 0.005 3 0.718 62.29 41.74
# Run Analysis RI (DP-like) and TI (EXU1-3) samples
library(vegan)
library(dplyr)
source(file = "../ChartingBCs_Light.R")
library(RColorBrewer)
library(ggplot2)
paperdata <- "PaperData/"
dir.create(path = paperdata)
rootdir <- "./"
bseqdir <- paste0(rootdir,"Input/")
samplesheets <- list(MinPct = paste0(bseqdir,"TD_Timing_samplesheet.txt"))
# SampleIDs / groups present in the paper
DPlike_Exp2022_05 <- c("ms01","ms02","ms03","ms04","ms05","ms06")
EXU1_Exp2022_05 = c("ms09","ms10","ms11","ms12","ms13","ms14","ms15","ms16")
EXU3_Exp2022_05 = c("ms17","ms18","ms19","ms20","ms22","ms23","ms24")
EXU2_Exp2022_05 = c("ms25","ms26","ms27","ms29","ms30","ms31","ms32")
SampleIDs <- list(
DPlike_Exp2022_05 = DPlike_Exp2022_05,
EXU1_Exp2022_05 = EXU1_Exp2022_05,
EXU2_Exp2022_05 = EXU2_Exp2022_05,
EXU3_Exp2022_05 = EXU3_Exp2022_05,
RITIs_Exp2022_05 = c(DPlike_Exp2022_05,EXU1_Exp2022_05,EXU2_Exp2022_05,EXU3_Exp2022_05)
)
#### Basic analysis ####
for(setting in c("MinPct")){
dir.create(path = setting, showWarnings = F)
if(setting == "Auto"){
custom.cut.param <- FALSE
}else{
custom.cut.param <- TRUE
}
if(setting == "MinCPM"){
MinlogCPMFilter.param <- TRUE
}else{
MinlogCPMFilter.param <- FALSE
}
for (SampleGroup in names(SampleIDs)) {
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]],
custom.cut = custom.cut.param, do.filter = T,
method = setting,
removeMaxN = TRUE,
MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[[SampleGroup]], ed = 2, mc = 3,
projectid = paste0(setting,"_",SampleGroup,"_MaxN_removal"),
outfolder = paste(setting,paste0(SampleGroup,"_MaxN_removal","_seqtk"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
}
MinPct_RITIs_Exp2022_05_MaxN_removal_objects <- readRDS("MinPct_RITIs_Exp2022_05_MaxN_removal_seqtk/MinPct_RITIs_Exp2022_05_MaxN_removal_objects.rds")
mylist <- list(Exp2022_05 = MinPct_RITIs_Exp2022_05_MaxN_removal_objects)
shared_RITIs_list <- list()
for(j in names(mylist)){
tmp.obj <- mylist[[j]]
for(comp in names(tmp.obj$sharingbygroup)){
else{
shared_RITIs_list[[paste0(j,"_",comp)]] <- rownames(tmp.obj$sharingbygroup[[comp]])
}
}
}
shared_RITIs <- list(Exp2022_05 = unique(unlist(shared_RITIs_list[grep(pattern = "Exp2022_05", value = T, x = names(shared_RITIs_list))])))
saveRDS(object = shared_RITIs, "PaperData/shared_RITIs_step01.rds")
library(dplyr)
shared_RITIs_mt <- list()
shared_RITIs_mt_pcts <- list()
for(j in names(mylist)){
tmp.obj <- mylist[[j]]
shared_RITIs_mt[[j]] <- tmp.obj$counts_abund_matrices$logCPM[shared_RITIs[[j]],c(DPlike_Exp2022_05,EXU1_Exp2022_05,EXU2_Exp2022_05,EXU3_Exp2022_05)]
shared_RITIs_mt[[j]] <- as.matrix(arrange_all(shared_RITIs_mt[[j]],desc))
shared_RITIs_mt_pcts[[j]] <- tmp.obj$counts_abund_matrices$pct_abund[shared_RITIs[[j]],c(DPlike_Exp2022_05,EXU1_Exp2022_05,EXU2_Exp2022_05,EXU3_Exp2022_05)]
shared_RITIs_mt_pcts[[j]] <- as.matrix(arrange_all(shared_RITIs_mt_pcts[[j]],desc))
}
# exclude BCs that are shared within group from the list of BCs to be re-assigned ####
within_objs <- list()
for(j in names(mylist)){
ids <- NULL
if(j == "Exp2022_05"){
ids <- c("DPlike_Exp2022_05","EXU1_Exp2022_05","EXU2_Exp2022_05","EXU3_Exp2022_05")
}
for(id in ids){
within_objs[[j]][[id]] <- readRDS(paste0("MinPct/",id,"_MaxN_removal_seqtk/MinPct_",id,"_MaxN_removal_objects.rds"))
tmpdf <- as.data.frame(within_objs[[j]][[id]]$aggregationbygroup_melt)
print(head(rownames(tmpdf)))
within_objs[[j]][[id]] <- rownames(tmpdf)[which(tmpdf[[id]] > 1)]
writeLines(text = within_objs[[j]][[id]] , con = paste0(paperdata,"/Shared_barcodes_within_",id,".txt"))
}
}
# the barcodes above (within_objs lists) should be discarded from collision rule - fetching their index ####
index_to_retain <- list()
ratio_max_to_2nd <- list()
reassignment_rule <- list()
mydf_weightedratio <- list()
means <- list()
for(j in names(mylist)){
index_to_retain[[j]] <- which(!rownames(shared_RITIs_mt_pcts[[j]]) %in% unique(unlist(within_objs[[j]])))
barcodes2exclude <- intersect(rownames(shared_RITIs_mt_pcts[[j]]), unique(unlist(within_objs[[j]])))
writeLines(text = barcodes2exclude,
con = paste0("PaperData/barcodes_shared_both_within_excluded_from_collision_",j,".txt"))
shared_RITIs_mt_pcts[[j]] <- shared_RITIs_mt_pcts[[j]][index_to_retain[[j]],]
shared_RITIs_mt[[j]] <- shared_RITIs_mt[[j]][index_to_retain[[j]],]
sds <- apply(shared_RITIs_mt_pcts[[j]], MARGIN = 1, FUN = function(x) {sd(x[x > 0])})
maxs <- apply(shared_RITIs_mt_pcts[[j]], MARGIN = 1, FUN = max)
means[[j]] <- apply(shared_RITIs_mt_pcts[[j]], MARGIN = 1, FUN = mean)
second <- apply(shared_RITIs_mt_pcts[[j]], MARGIN = 1, FUN = function(x) {sort(x, decreasing = T)[2]})
#reassignment_rule[[j]] <- ifelse((maxs - sds) > second, 1, 0)
#print(reassignment_rule[[j]])
ratio_max_to_2nd[[j]] <- apply(shared_RITIs_mt_pcts[[j]], MARGIN = 1, FUN = max) / apply(shared_RITIs_mt_pcts[[j]], MARGIN = 1, FUN = function(x) {sort(x, decreasing = T)[2]})
mydf_weightedratio[[j]] <- as.data.frame(ratio_max_to_2nd[[j]])
colnames(mydf_weightedratio[[j]]) <- "ratioMax2Second"
mydf_weightedratio[[j]]$weighted <- mydf_weightedratio[[j]]$ratioMax2Second * means[[j]]
mydf_weightedratio[[j]]$logged <- log10(mydf_weightedratio[[j]]$weighted)
reassignment_rule[[j]] <- ifelse(mydf_weightedratio[[j]]$logged > -1, 1, 0)
names(reassignment_rule[[j]]) <- rownames(mydf_weightedratio[[j]])
saveRDS(object = ratio_max_to_2nd[[j]], file = paste0("PaperData/ratio_max_to_2nd_pcts_",j,"_nofilt_within.rds"))
png(paste0("PaperData/",j,"_Baplot_ratio_max_2nd.png"), width = 9, height = 6, units = "in", res = 200)
barplot(log10(ratio_max_to_2nd[[j]][order(ratio_max_to_2nd[[j]], decreasing = T)]), xaxt='n',
xlab = "Shared barcodes",
main = "log10 ratio max value vs 2nd")
abline(h = 1, col = "red")
dev.off()
}
saveRDS(object = mydf_weightedratio, file = paste0(paperdata,"/mydf_weightedratio.rds"))
saveRDS(object = ratio_max_to_2nd, file = paste0(paperdata,"/ratio_max_to_2nd.rds"))
saveRDS(object = reassignment_rule, file = paste0(paperdata,"/reassignment_rule.rds"))
saveRDS(object = means, file = paste0(paperdata,"/means.rds"))
library(reshape2)
shared_RITIs_mt_melt <- list()
barcodes_to_reassign_A <- list()
barcodes_to_reassign_A_names <- list()
for(j in names(mylist)){
barcodes_to_reassign_A[[j]] <- names(ratio_max_to_2nd[[j]][ratio_max_to_2nd[[j]] > 10]) # filtering
barcodes_to_reassign_A_names[[j]] <- colnames(mylist[[j]]$counts_abund_matrices$pct_abund[barcodes_to_reassign_A[[j]],])[apply(X = mylist[[j]]$counts_abund_matrices$pct_abund[barcodes_to_reassign_A[[j]],], MARGIN = 1, FUN = function(x){which.max(x)})]
names(barcodes_to_reassign_A[[j]]) <- barcodes_to_reassign_A_names[[j]]
saveRDS(object = barcodes_to_reassign_A[[j]], file = paste0("PaperData/barcodes_to_reassign_A_",j,".rds"))
pheatmap::pheatmap(mat = shared_RITIs_mt[[j]], cluster_cols = F, cluster_rows = F,
scale = "none", filename = paste0(paperdata, "Shared_barcodes_RITIs_logCPM_before_reassignment_",j,".png"),
main = paste0("Shared barcodes (",nrow(shared_RITIs_mt[[j]]),") between TIs vs TIs vs RI (replicates)", "\n",
"logCPM counts"),
show_rownames = F,
width = 9, height = 6, color = rev(brewer.pal(n = 9, "Spectral")))
shared_RITIs_mt_melt[[j]] <- as.data.frame(melt(shared_RITIs_mt[[j]]))
png(filename = paste0(paperdata, "Shared_barcodes_RITIs_logCPM_ggplot_before_reassignment_",j,".png"),
width = 9, height = 6, units = "in", res = 300)
print(ggplot(data = shared_RITIs_mt_melt[[j]],
mapping = aes(x = Var2, y = Var1, fill = value)) +
geom_tile() + xlab("Samples") + ylab("Barcodes") +
# scale_x_discrete(guide = guide_axis(n.dodge=2)) +
ggtitle(paste0("Shared barcodes (",nrow(shared_RITIs_mt[[j]]),") between TIs vs TIs vs RI"), subtitle = "logCPM counts") +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
axis.text.y.left = element_blank(), axis.ticks.y.left = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_fill_distiller(palette = "Spectral", direction = -1))
dev.off()
}
# Note: barcodes shared also within condition are removed from re-assignment ####
for(setting in c("MinPct")){
dir.create(path = setting, showWarnings = F)
if(setting == "Auto"){
custom.cut.param <- FALSE
}else{
custom.cut.param <- TRUE
}
if(setting == "MinCPM"){
MinlogCPMFilter.param <- TRUE
}else{
MinlogCPMFilter.param <- FALSE
}
for (SampleGroup in c("DPlike_Exp2022_05","EXU1_Exp2022_05","EXU2_Exp2022_05","EXU3_Exp2022_05","RITIs_Exp2022_05")) { # pickup only groups that contain 1st transplant samples
chartingBCs_light(bseq.fold = bseqdir, samplesheet = samplesheets[[setting]],
custom.cut = custom.cut.param, do.filter = T,
method = setting, bcodes2reassign = barcodes_to_reassign_A[["Exp2022_05"]],
removeMaxN = TRUE,
MinlogCPMFilter = MinlogCPMFilter.param,
sampleids = SampleIDs[[SampleGroup]], ed = 2, mc = 3,
projectid = paste0(setting,"_",SampleGroup,"_MaxN_removal_reassignedA"),
outfolder = paste(setting,paste0(SampleGroup,"_MaxN_removal","_seqtk_reassignA"),sep = "/"),
counts_vector = c(5,20,50,100,200,500,5000,20000),
pcts_vector = c(0.001,0.002,0.005,0.01,0.02,0.1,0.2,0.5,0.75,1,2))
}
}
#### sharing intra condition #####
library(openxlsx)
library(ggplot2)
library(ggrepel)
# load input for each batch ####
processed_data_A <- list(Exp2022_05 = list())
for(batch in names(processed_data_A)){
processed_data_A[[batch]][["DPlike"]] <- readRDS(paste0("MinPct/DPlike_Exp2022_05_MaxN_removal_seqtk_reassignA/MinPct_DPlike_Exp2022_05_MaxN_removal_reassignedA_objects.rds"))
processed_data_A[[batch]][["EXU1"]] <- readRDS(paste0("MinPct/EXU1_Exp2022_05_MaxN_removal_seqtk_reassignA/MinPct_EXU1_Exp2022_05_MaxN_removal_reassignedA_objects.rds"))
processed_data_A[[batch]][["EXU2"]] <- readRDS(paste0("MinPct/EXU2_Exp2022_05_MaxN_removal_seqtk_reassignA/MinPct_EXU2_Exp2022_05_MaxN_removal_reassignedA_objects.rds"))
processed_data_A[[batch]][["EXU3"]] <- readRDS(paste0("MinPct/EXU3_Exp2022_05_MaxN_removal_seqtk_reassignA/MinPct_EXU3_Exp2022_05_MaxN_removal_reassignedA_objects.rds"))
}
# Sharing intra group
sharing_intra_A <- list(Exp2022_05 = list()) # strategy A - final choice
for(batch in names(sharing_intra_A)){
for(k in c("DPlike","EXU1","EXU2","EXU3")){
sharing_intra_A[[batch]][[k]] <- read.table(file = paste0("MinPct/",k,"_",batch,"_MaxN_removal_seqtk_reassignA/Tables/Sharing_MinPct_",k,"_",batch,"_MaxN_removal_reassignedA.txt"),
nrow = length(SampleIDs[[paste0(k,"_",batch)]]))
}
}
### Plotting intragroup sharing boxplots #####
for(j in names(sharing_intra_A)){
for(d in names(sharing_intra_A[[j]])){
# A type reassignment
sharing_intra_A[[j]][[d]] <- subset(sharing_intra_A[[j]][[d]], select = c("V2","V5"))
sharing_intra_A[[j]][[d]]$Group <- d
colnames(sharing_intra_A[[j]][[d]]) <- c("SampleID","pct.share_1vsAll","Group")
sharing_intra_A[[j]][[d]]$pct.share_1vsAll <- gsub(x = sharing_intra_A[[j]][[d]]$pct.share_1vsAll, pattern = "%", replacement = "")
}
}
colorvector <- c("#D6D6D6","#EA4125","#DCFA8A","#194375")
names(colorvector) <- c("DPlike","EXU1","EXU2","EXU3")
## Plotting
for(j in names(sharing_intra_A)){
df <- rbind(sharing_intra_A[[j]][["DPlike"]], sharing_intra_A[[j]][["EXU1"]])
df <- rbind(df, sharing_intra_A[[j]][["EXU2"]])
df <- rbind(df, sharing_intra_A[[j]][["EXU3"]])
df$pct.share_1vsAll <- as.numeric(df$pct.share_1vsAll)
df$Group <- factor(df$Group, levels = c("DPlike", "EXU1", "EXU2","EXU3"))
write.xlsx(df, file = paste0(paperdata,"Pct_sharing_1_vs_All_intragroup_",j,"_RITIs_reassignmentA.xlsx"), asTable = T)
png(paste0(paperdata,"Pct_sharing_1_vs_All_intragroup_",j,"_RITIs_reassignmentA.png"), width = 6, height = 6, units = "in", res = 300)
print(ggplot(df, mapping = aes(x = Group, y = pct.share_1vsAll, color = Group, label = SampleID)) +
geom_boxplot(outlier.shape = NA) + geom_jitter(position = position_jitter(seed = 6), show.legend = F) +
#geom_text_repel(position = position_jitter(seed = 6), show.legend = F) +
theme_bw() + ylab("% sharing") +
ggtitle(label = "Barcoding sharing intra-group", subtitle = paste0( j, "- One vs All - reassign A")) +
theme(axis.title.x = element_blank(),
legend.position = c(0.15, 0.8),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)))
dev.off()
}
## fix colors and labels for pubblication ####
intra_batch_exp2022_05 <- read.xlsx(paste0(paperdata,"Pct_sharing_1_vs_All_intragroup_Exp2022_05_RITIs_reassignmentA.xlsx"))
# Shannons
shannons_list_A <- list()
for(batch in names(processed_data_A)){
for(k in names(processed_data_A[[batch]])){
shannon <- diversity(processed_data_A[[batch]][[k]]$counts_abund_matrices$Raw, index = "shannon", MARGIN = 2)
shannon_df <- as.data.frame(shannon)
colnames(shannon_df) <- "Shannon"
shannons_list_A[[batch]][[k]] <- shannon_df
}
}
shannon_Exp2022_05_A <- rbind(shannons_list_A$Exp2022_05$DPlike, shannons_list_A$Exp2022_05$EXU1)
shannon_Exp2022_05_A <- rbind(shannon_Exp2022_05_A, shannons_list_A$Exp2022_05$EXU2)
shannon_Exp2022_05_A <- rbind(shannon_Exp2022_05_A, shannons_list_A$Exp2022_05$EXU3)
saveRDS(shannon_Exp2022_05_A, paste0(paperdata,"Shannons.rds"))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment