library(RColorBrewer)
library(DESeq2)
library(pheatmap)
library(ggplot2)
library(gplots)
library(ggrepel)
library(dbplyr)

# Metadata definition
cellcompartment <- c("HSC", "HSC", "MPP", "MPP", "MEP", "MEP", "CMP", "CMP", "GMP", "GMP",
                   "HSC", "HSC", "MPP", "MPP", "MEP", "MEP", "CMP", "CMP", "GMP", "GMP")
condition <- c("THAL", "THAL", "THAL", "THAL", "THAL","THAL","THAL","THAL","THAL","THAL",
               "HD","HD","HD","HD","HD","HD","HD","HD","HD","HD")
expgroup <- c("HSC_THAL", "HSC_THAL", "MPP_THAL", "MPP_THAL", "MEP_THAL", "MEP_THAL", "CMP_THAL", "CMP_THAL", "GMP_THAL", "GMP_THAL",
              "HSC_HD", "HSC_HD", "MPP_HD", "MPP_HD", "MEP_HD", "MEP_HD", "CMP_HD", "CMP_HD", "GMP_HD", "GMP_HD")

myCompartment <- data.frame(cellcompartment, condition, expgroup)

myCompartment$expgroup <- as.factor(myCompartment$expgroup)

myDataframe <- read.table(file = "Fig1D_input.txt", header = TRUE, stringsAsFactors = FALSE, row.names = 1)
myMatrix <- myDataframe[ , 6:ncol(myDataframe)]
colnames(myMatrix) <- c(paste0("S", 1:20))
myMatrix <- as.matrix(myMatrix)

#make coldesign
myGroup <- paste(myCompartment$cellcompartment, myCompartment$condition, sep = "_")
myCompartment$expgroup <- myGroup

#DESeq2 object
dds <- DESeqDataSetFromMatrix(countData = myMatrix,
                              colData = myCompartment,
                              design = ~ expgroup)
#pre-filtering 
dds <- dds[rowSums(counts(dds))>0, ]

#normalization and dispersion estimation
dds <- DESeq(object = dds, parallel = T)

#r-log transformation for visualization and performing
rld <- rlogTransformation(dds)

#PCA analysis

pcagenes <- 500
#dataPCA <- plotPCA(rld, intgroup = c("cellcompartment", "condition"), returnData=TRUE, 
#                   ntop=pcagenes)
dataPCA <- plotPCA(rld, intgroup = "expgroup", returnData=TRUE, ntop=pcagenes)
percentVar <- round(100 * attr(dataPCA, "percentVar"))
png(filename = "Fig1D.png",width = 6, height = 5, units = "in", res = 300)
ggplot(dataPCA, aes(PC1, PC2, fill= group, label = name)) + 
  geom_point(size=2.5, shape = 21, colour= "black") +
          scale_fill_brewer(palette="Paired") +
          # scale_colour_brewer(palette="Paired") + 
          geom_text_repel() +
          xlab(label = paste0("PC1: ",percentVar[1],"% variance")) +
          ylab(label = paste0("PC2: ",percentVar[2],"% variance")) +
          ggtitle(label = "Principal Component Analysis", paste("Top 500 most variable genes - DESeq2")) +
          theme_bw() +
          theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5), 
                plot.subtitle=element_text(size=8, hjust=0.5, face="italic", color="black"),
                axis.title = element_text(size=12, hjust=0.5, face="bold", color="black"),
                legend.text = element_text(size=10, color="black"),
                legend.title = element_blank(),
                axis.text = element_text(size=12, hjust=0.5, color="black"))
dev.off()

