
MethylAnalysis_bulk <- function(methcall.folder = NULL, # input folder with .cov files (bismark)
                           outfolder = NULL, # output folder (if not exist it will be created)
                           proj.id = "myproject", # project id to prefix in putput files
                           samplesheet = NULL,
                           sheetnum = 1,
                           design.var = "Disease", # variable to subset samplesheet
                           case.var = "Case", # 1 in treatment vector
                           control.var = "Control", # 0 in treatment vector
                           idcol = "SampleID",
                           mincoverage = 10,
                           lowperc = 5,
                           lowcount = 10,
                           assem="hg38",
                           do.subset = T,
                           chr.subset = "chr9",
                           start.subset = 136668000,
                           end.subset = 136671000,
                           pipeline.meth = "bismarkCoverage",
                           plot.covariates = c("Condition","Batch","MRD",
                                               "Torelapse","Chemorefractory",
                                               "Sex","CytoA","Tissue"),
                           idstoremove = NULL,
                           delta.meth = 10,
                           plot.categorical.vars = c("Condition","Batch","MRD","Torelapse","Chemorefractory","Sex","CytoA"),
                           plot.continuous.vars = c("miR-126","egfl7"),
                           plot.height = 9,
                           plot.width = 12,
                           cellw = 15, 
                           cellh = 15,
                           fontnumsize = 5,
                           fontsize = 8
                           ){
  
  # load libraries
  
  require(methylKit)
  require(ggplot2)
  require(reshape2)
  require(plyr)
  require(ggrepel)
  library(GenomicRanges)
  library(openxlsx)
  library(pheatmap)
  
  # check vars
  
  if(is.null(methcall.folder)){
    stop("Please set methcall.folder")
  }
  
  if(is.null(outfolder)){
    stop("Please set outfolder")
  }
  
  if(is.null(samplesheet)){
    stop("Please set samplesheet")
  }
  
  # Setup variables
  
  infolder = paste0(methcall.folder, "/")
  dir.create(infolder, showWarnings = F)
  
  outfolder = paste0(outfolder, "/")
  dir.create(outfolder, showWarnings = F)
  
  qcfolder <- paste0(outfolder, "QC/")
  dir.create(qcfolder, showWarnings = F)
  
  
  # reading sample sheet with metadata
  ssheet <- read.xlsx(samplesheet, sheet = sheetnum)
  
  if(!is.null(idstoremove)){
    ssheet <- subset.data.frame(x = ssheet, subset = !ssheet[[idcol]] %in% idstoremove)
  }

  # defining design vector according to variable
  
  ssheet <- subset.data.frame(x = ssheet, subset = ssheet[[design.var]] %in% c(case.var, control.var))
  
  d.vec <- ssheet[[design.var]]
  d.vector <- ifelse(d.vec == case.var, 1, 0)
  
  # initialzing coverage .cov files list
  covs <- list()
  sampleids <- as.list(ssheet[[idcol]])
  names(sampleids) <- ssheet[[idcol]]
  
  for (i in ssheet[[idcol]]) {
    covs[[i]] <- paste0(infolder, "/", i, ".bismark.cov")
  }
  
  saveRDS(object = covs, file = "covs_object.rds")
  
  # creating object 
  
  myobj <- methRead(location = covs,
                    sample.id = sampleids,
                    assembly = assem,
                    pipeline = pipeline.meth,
                    treatment = d.vector,
                    context="CpG", 
                    mincov = mincoverage)
  
  saveRDS(myobj, file = "Myobject.rds")
  
  names(myobj) <- ssheet[[idcol]]
  
  saveRDS(myobj, file = "Myobject_with_names.rds")

  # subsetting if declared
  
  if(isTRUE(do.subset)){
    my.win = GRanges(seqnames = chr.subset, ranges = IRanges(start = start.subset, end = end.subset))
    myobj <- selectByOverlap(myobj,my.win)
  }

  
  saveRDS(myobj, file = "Myobject_with_names_aftersubset.rds")
  
  # filtering on minimum coverage 
  myobj <- filterByCoverage(methylObj = myobj, lo.count=lowcount, lo.perc = lowperc)
  
  # Normalization
  myobj <- normalizeCoverage(obj = myobj)
  
 # saveRDS(object = myobj, file = "Initial_object.rds")
  
  # Calculate basic stats and PCs

  metricsfolder <- paste0(qcfolder, "Metrics/")
  dir.create(path = metricsfolder, showWarnings = F)
    
  for (id in ssheet[[idcol]]) {
    
    png(filename = paste0(metricsfolder,proj.id,"_CpG_pct_methylation_sample_", id, ".png"), 
        width = 9, height = 6, units = "in", res = 96)
    print(getMethylationStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
    dev.off()
    png(filename = paste0(metricsfolder, proj.id,"_Coverage_stats_sample_", id, ".png"), 
        width = 9, height = 6, units = "in", res = 96)
    print(getCoverageStats(myobj[[id]],plot=TRUE,both.strands=FALSE))
    dev.off()
  }
  
  # create meth obj 
  
  #meth <- unite(object = myobj, destrand=FALSE)
  meth <- unite(object = myobj, destrand=FALSE) # for debugging
  saveRDS(meth, "savemeth.tmp.rds")
  # Perform correlation
  
  sink(paste0(qcfolder, proj.id, "_Correlations.txt"))
  getCorrelation(meth,plot=FALSE)
  sink()
  
  if(length(ssheet[[idcol]]) < 15){
    png(filename = paste0(qcfolder,proj.id, "_Correlations_pearson_pairwise.png"), 
        width = 9, height = 6, units = "in", res = 96)
    print(getCorrelation(meth,plot=TRUE))
    dev.off()
  }
  
  png(filename = paste0(qcfolder, proj.id, "_Clustering.png"), 
      width = 9, height = 6, units = "in", res = 96)
  clusterSamples(meth, dist="euclidean", plot=TRUE, method = "ward.D2")
  dev.off()
  
  # Re-plotting PCs (custom chart)
  
  # compute PCs and store in object
  
  pca_compt <- PCASamples(meth, obj.return = T, screeplot = F)
  
  # extract PCs components
  
  pcafolder <- paste0(qcfolder, "PCA/")
  dir.create(path = pcafolder, showWarnings = F)
  
  pca_pc1_2 <- as.data.frame(x = pca_compt$x[,1:2])
  
  for(myvars in plot.covariates){
    pca_pc1_2$condition <- as.factor(ssheet[[myvars]])
    png(filename = paste0(pcafolder, proj.id, "_PCA_",myvars,".png"), 
        width = 9, height = 6, units = "in",res=96)
    print(ggplot(data = pca_pc1_2, 
                 mapping = aes(x = PC1, y=PC2, col=condition, label=rownames(pca_pc1_2))) + 
            geom_point(size=3) + geom_text_repel(size=3) + ggtitle(label = "Principal component analysis", subtitle = myvars) + 
            theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) +
            theme(plot.subtitle=element_text(size=12, hjust=0.5, face="italic", color="black")) +
            theme(axis.title = element_text(size=12, hjust=0.5, face="bold", color="black")) +
            theme(legend.text = element_text(size=8, hjust=0.5)) +
            theme(legend.title = element_blank()) +
            theme(axis.text = element_text(size=12, hjust=0.5, color="black")))
    dev.off()
    
  }
  
  # retrieve and store % of methylation 
  
  perc.meth <- percMethylation(meth)
  
  saveRDS(perc.meth,"pctmethly.rds")
  
  base::rownames(perc.meth) <- paste0(meth$chr, "_", meth$start)
  
  # Perform diff methylation
  
  myDiff=calculateDiffMeth(meth)
  
  write.table(myDiff,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
  difftest <- read.table(paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), header = T)

  difftest$comparison <- proj.id
  difftest$qvalue_r <- as.character(cut(x = difftest$qvalue, 
                                         breaks = c(-1, 1e-100, 1e-10, 1e-02, 1), 
                                         labels = c("***","**","*","ns")))
  
  myindex <- abs(difftest$meth.diff) < delta.meth
  difftest$meth.diff <- abs(difftest$meth.diff)
  difftest$qvalue_r[myindex] <- "ns"
  write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
  write.table(difftest,paste0(outfolder,proj.id,"_DiffMeth_single_CpG.txt"), row.names = F)
  

  # Adding color list and annotations

  library(RColorBrewer)

  color_list <- list()
  annrows <- NULL
  anncols <- subset.data.frame(difftest, select = c("qvalue_r","meth.diff"))
  base::rownames(anncols) <- difftest$start
  
  rows.annot.vars.cat = plot.categorical.vars
  rows.annot.vars.con = plot.continuous.vars

  if(!is.null(rows.annot.vars.cat) | !is.null(rows.annot.vars.con)){
    rownames(ssheet) <- ssheet$SampleID
    annrows <- subset.data.frame(x = ssheet, select = c(rows.annot.vars.cat, rows.annot.vars.con))
    concolors <- RColorBrewer::brewer.pal(n = 9, name = "Set1")
    catcolors <- NULL
    for (varcon in 1:length(rows.annot.vars.con)) {
      color_list[[rows.annot.vars.con[varcon]]] <- colorRampPalette(c("lightgrey", concolors[varcon]))(10)
    }
    for (varcat in 1:length(rows.annot.vars.cat)) {
      ncolor <- 1
      for (val in unique(ssheet[[rows.annot.vars.cat[varcat]]])[order(unique(ssheet[[rows.annot.vars.cat[varcat]]]))]){
        if(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])) > 9){
          catcolors <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = "Set3"))(length(unique(ssheet[[rows.annot.vars.cat[varcat]]])))
        }
        else{
          catcolors <- RColorBrewer::brewer.pal(n = 9, name = "Set3")
        }
        color_list[[rows.annot.vars.cat[varcat]]][[val]] <- catcolors[ncolor]
        ncolor <- ncolor + 1
      }
    }
  }
  
  color_list[["qvalue_r"]] = c("*" = "#6497b1", "**" = "#03396c", "***" = "#011f4b", ns= "#c4cacf")
  color_list[["meth.diff"]] = colorRampPalette(brewer.pal(n = 11, "Reds"))(100)
  color_list[["miR-126"]] <- brewer.pal(n = 9, "PuRd")
  
  # Heatmap
  
  pctmeth_matrix <- t(perc.meth)
  base::colnames(pctmeth_matrix) <- gsub(x = base::colnames(pctmeth_matrix), pattern = "chr[0-9]+_", replacement = "")
  
  pheatmap(mat = pctmeth_matrix, main = gsub(x = proj.id, pattern = "_", replacement = " "), 
           filename = paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.pdf"), width = plot.width, height = plot.height,
           na_col = "black", 
           cluster_cols = FALSE, 
           cluster_rows = TRUE, 
           annotation_row = annrows, 
           cellwidth = cellw, 
           cellheight = cellh, 
           display_numbers = T, 
           fontsize = fontsize,
           fontsize_number = fontnumsize, 
           number_format = "%.0f",
           #border_color = "#CBBEB5", 
           annotation_col = anncols, 
           #labels_row = lrow,
           #labels_col = lcol,
           #gaps_row = gaps.row,
           gaps_col = c(8),
           annotation_colors = color_list,
           color = c("#F5F5F5","#EEEEEE","#CCCCCC","#999999", "#666666","#333333","#000000"), breaks = c(0,10,20,30,50,70,90,100)
  )
  
  saveRDS(object = pctmeth_matrix, paste0(outfolder, proj.id,"_CpG_percent_methylation_matrix_pheatmap.rds"))
  saveRDS(object = list(anncol = anncols, annrow = annrows, anncolors = color_list), paste0(outfolder, proj.id,"_annotations_matrix_pheatmap.rds"))
  saveRDS(object = difftest, paste0(outfolder, proj.id,"_CpG_differential_methylation.rds"))
  saveRDS(object = perc.meth, paste0(outfolder, proj.id,"_CpG_percent_methylation.rds"))
  
  write.table(x = perc.meth, file = paste0(outfolder, proj.id,"_CpG_percent_methylation.txt"))
  write.table(x = difftest, file = paste0(outfolder, proj.id,"_CpG_differential_methylation.txt"))

  saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit.rds"))
  saveRDS(myobj, file = paste0(outfolder,proj.id,"_methylkit_meth.rds"))

}