I updated the scripts

cad0b7de · Marco Monti · a78a5561 · cad0b7de · cad0b7de · cad0b7de
Commit cad0b7de authored Mar 25, 2025 by Marco Monti
--- a/CB2025_figure_1_RNAseq.R
+++ b/CB2025_figure_1_RNAseq.R
@@ -2,35 +2,56 @@ library("readxl")
 library("dplyr")

 #####Directories#####
-us <- "/Users/Squadrito/"
-us <-"C:/Users/bresesti.chiara/"
-wdir1509<-paste0(us,"/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
-wdir1510<-paste0(us,"/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
-fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
+# us <-"C:/Users/bresesti.chiara/"
+# wdir1509<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
+# wdir1510<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
+# fdir <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
+# 
+# input_d <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
+# wdir1510 <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
+# output_d <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
+
+input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
+output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
+
+################################################################################
+# R script to generate data and figures for manuscript figures 1B, 1C, and 1D.
+#
+# This script reads RNA-seq and miRNA-seq data from Excel files, performs
+# normalization and data transformation, and generates output tables and plots.
+#
+# Figures generated/data produced:
+#   - Figure 1B: Heatmap data for selected marker genes (Excel table)
+#   - Figure 1C: Pairwise MA plots for miRNA data
+#   - Figure 1D: Heatmap data for selected miRNA families (Excel table)
+#
+# Input data files:
+#   - Figure 1B: "QIAseqUltraplexRNA_181342.xlsx" (Sheet 3: umis.genes.polyA-mouse)
+#   - Figure 1C & 1D: "173308.all_samples.summary.xlsx" (Sheet 2: miRNA_piRNA)
+#   - Figure 1D: "miRNA_Family.xlsx" (Sheet 1)
+#
+################################################################################


 #### Figure 1B ####
-setwd(wdir1509)
-df1 <- as.data.frame(read_excel("QIAseqUltraplexRNA_181342.xlsx",sheet=3,col_names = T,skip = 1))#sheet: umis.genes.polyA-mouse
-df1 <-df1[,-c(1,3:6)] #keep only gene name and UMI counts
-df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) #UMI normalized by CPM
+df1 <- as.data.frame(read_excel(paste0(input_dir, "/QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) #sheet: umis.genes.polyA-mouse
+df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
+df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
 gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax", 
                 "Ccr2", "Itgam", "Mgl2", "Cd68", "Vcam1", "Csf1r", "Adgre1", 
                 "Siglec1", "Hmox1", "Timd4", "Vsig4", "Clec4f", "Marco", "Pecam1", 
                 "Tek", "Lyve1", "Stab2")
 df2 <- df1[df1$gene%in%gene_vector,] %>% arrange(factor(gene, levels=gene_vector))
 rownames(df2) <- df2[,1]
-df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) #Zscore normalization. Scaled only works on columns, so need to transform
-setwd(fdir)
-openxlsx::write.xlsx(df2.scaled,"Figure_1B_table.xlsx", rowNames=T)
-#the df was exported and used to create a heatmap using Graphpad
+df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
+openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T)
+# the df was exported and used to create a heatmap using Graphpad

 #### Figure 1C ####
 # "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator 
 # and merged with the plot generated by the code below
-setwd(wdir1510)
-df1 <- as.data.frame(read_excel("173308.all_samples.summary.xlsx",sheet=2,col_names = T))#sheet: miRNA_piRNA
-df1 <-df1[,1:7]#UMI 
+df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) #sheet: miRNA_piRNA
+df1 <-df1[,1:7] #UMI 
 rownames(df1)<-df1$miRNA
 df2 <- apply(df1[,-1], 2, function(x) log(x)) #Log counts
 df2 <- as.data.frame(limma::normalizeCyclicLoess(df2, weights = NULL, span=0.7, iterations = 5, method = "pairs")) #Cyclic loess normalization
@@ -41,17 +62,16 @@ upper.panel<-function(x, y, ...){
  points(((x+y)/2)[above],(x-y)[above], col="red", cex=0.6, pch=19)
  below <- (x-y+1)*((x+y)/2-5) < -5 & ((x+y)/2)>5
  points(((x+y)/2)[below],(x-y)[below], col="blue", cex=0.6, pch=19)
-} #function for MA plot
+} # function for MA plot
 pairs(df2[,1:6], lower.panel = NULL, upper.panel = upper.panel, 
-      ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) #pairwise plot
+      ylim=c(-8.5,8.5), xlim=c(2,14), cex.labels = 2) # pairwise plot

 #### Figure 1D ####
-setwd(wdir1510)
-df1 <- as.data.frame(read_excel("173308.all_samples.summary.xlsx",sheet=2,col_names = T))#sheet: miRNA_piRNA
-df1 <- df1[-which(grepl("piR",df1[,1])),1:7]#UMI and piRNA removing
-df1[,1] <- gsub("/.*","",df1[,1]) #leave only first miRNA for ambiguous entries
-df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000})#UMI normalized by CPM
-df.families <- as.data.frame(read_excel("Analysis CB/miRNA Family.xlsx",sheet=1,col_names = T))#miRNA families from miRBase
+df1 <- as.data.frame(read_excel(paste0(input_dir, "/173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) #sheet: miRNA_piRNA
+df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
+df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
+df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x,na.rm=T)*1000000}) #UMI normalized by CPM
+df.families <- as.data.frame(read_excel("Analysis CB/miRNA_Family.xlsx",sheet=1,col_names = T))#miRNA families from miRBase
 df.families <- df.families[df.families[,3]==10090,c(4,1)] #select mouse entries
 df.families[,1] <- sub(pattern = "p.*",replacement ="p",x = df.families[,1])
 df2 <- merge(df.families,df1,by=1)
@@ -64,6 +84,5 @@ gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.
 df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR family`, levels=gene_vector))
 rownames(df3) <- df3[,1]
 df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) #Zscore normalization. Scaled only works on columns, so need to transform
-setwd(fdir)
-openxlsx::write.xlsx(df2.scaled,"Figure_1D_table.xlsx", rowNames=T)
+openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T)
 #the df was exported and used to create a heatmap using Graphpad
--- a/CB2025_figure_3_RNAseq.R
+++ b/CB2025_figure_3_RNAseq.R
@@ -7,18 +7,46 @@ library(clusterProfiler)
 library(enrichplot)

 ##### Directories #####
-#us <- "/Users/Squadrito/"
-us <-"C:/Users/bresesti.chiara/"
-wdir<-paste0(us,"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr")
-wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2")
-fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
+# us <-"C:/Users/bresesti.chiara/"
+# wdir<-paste0(us,"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr")
+# wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2")
+# fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
+
+input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
+output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
+
+################################################################################
+# 
+# R script to generate data and figures for manuscript figures 3A, 3B, 3C, and 3D.
+# This script performs differential gene expression analysis and gene set enrichment
+# analysis to generate volcano plots, empirical cumulative distribution function (ECDF) plots, 
+# dot plots, and enrichment maps.
+#
+# Figures generated:
+# - Figure 3A & 3B: Volcano plots of differentially expressed genes in miR-342-3p
+# overexpression and sponge experiments, and ECDF plots comparing
+# logFC distributions of all genes vs. miR-342-3p target genes.
+# - Figure 3C: Dot plot visualizing enriched pathways from gene set enrichment analysis (GSEA).
+# - Figure 3D: Enrichment map visualizing relationships between enriched pathways.
+#
+# Input data files:
+# - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control")
+# (Output from differential gene expression analysis, likely using edgeR)
+# - Figure 3A & 3B: "TargetScan8.0__miR-342-3p.predicted_targets.xlsx" (Sheet 1)
+# (List of predicted miR-342-3p target genes from TargetScan)
+# - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control")
+# (Same as input for Figure 3A & 3B, used for GSEA)
+# - Figure 3C & 3D: "miDB_sig5.MLS.rds" (RDS file containing gene sets for gene set enrichment analysis)
+#
+################################################################################
+
+

 #### Figure 3A&B ####

 #Import df
-setwd(wdir)
-miR_ctrl <- read_excel("edgeR_results.xlsx", sheet = "miR342-control")
-sponge_ctrl <- read_excel("edgeR_results.xlsx", sheet = "spongeBT-control")
+miR_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "miR342-control")
+sponge_ctrl <- read_excel(paste0(input_dir, "/edgeR_results.xlsx"), sheet = "spongeBT-control")

 #Add DEG color and label for volcano plot
 miR_ctrl$DEG <- "NO"
@@ -60,9 +88,8 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
  xlab(bquote(Log[2](FC))) +
  ylab(bquote(-Log[10](PVal)))

-#Import miR-342-3p target list (from TargetScan)
-setwd(wdir_CB)
-miR342_targets <-  read_excel("TargetScan8.0__miR-342-3p.predicted_targets.xlsx")
+# Import miR-342-3p target list (from TargetScan)
+miR342_targets <-  read_excel(paste0(input_dir, "/TargetScan8.0__miR-342-3p.predicted_targets.xlsx"))
 miR342_targets <-  filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3))

 #ecdf plot (right panel)
@@ -94,15 +121,13 @@ plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T,
  abline(h=0.5, col="black") +
  legend("bottomright", c("All genes","miR-342-3p targets"),
         col = c("black","red"), lwd=2, cex = 0.7)
-ks.test(test$logFC, data$logFC, alternative = "l") #Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data
+ks.test(test$logFC, data$logFC, alternative = "l") # Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data

 #### Figure 3C&D ####

-#Upload df
-setwd(wdir)
-df1<- read_excel("EdgeR_results.xlsx",, sheet = "miR342-control")
-setwd(wdir_CB)
-miDB_sig5 <- readRDS("miDB_sig5.MLS.rds") #GO terms db
+# Load df
+df1<- read_excel(paste0(input_dir, "/EdgeR_results.xlsx"), sheet = "miR342-control")
+miDB_sig5 <- readRDS(paste0(input_dir, "/miDB_sig5.MLS.rds")) #GO terms db

 #Rename pathways of interest in miDB_sig5
 names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_OXIDATIVE_PHOSPHORYLATION"] <- "Oxydative phosphorylation"
@@ -128,18 +153,18 @@ names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_ANGIOGENESIS"] <- "Angiogenesis"
 names(miDB_sig5)[names(miDB_sig5) == "GOMF_PATTERN_RECOGNITION_RECEPTOR_ACTIVITY"] <- "Pattern recognition receptor activity"
 names(miDB_sig5)[names(miDB_sig5) == "PGE2_RO"] <- "PGE2 response genes"

-#Filter low expression, NA and order by FC
+# Filter low expression, NA and order by FC
 df1 <- filter(df1, df1$logCPM>5 & !is.na(df1$...1))
 df1<-df1[order(df1$logFC),]
 df2<-df1$logFC
 names(df2)<-df1$...1

-#Run GSEA with fgsea and filter by PVal
+# Run GSEA with fgsea and filter by PVal
 sig <- miDB_sig5
-test1<-fgsea(sig, df2,minSize  = 7,maxSize  =500, nproc=1)
-PvalResult<-filter(test1, test1$padj<= 0.05)
+test1 <- fgsea(sig, df2, minSize=7, maxSize=500, nproc=1)
+PvalResult <- filter(test1, test1$padj <= 0.05)

-#DotPlot pathways of interest (Fig.3C)
+# DotPlot pathways of interest (Fig.3C)
 pathways <- c("Oxydative phosphorylation",
              "Regulation of cholesterol metabolic process",
              "Response to IL12",
@@ -168,7 +193,7 @@ ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) +
  geom_point() +
  scale_size_area(limits=c(10,450), max_size = 15) +
  scale_colour_gradient(low="red",high="blue") +
-  labs(y='Pathway',x='NES')
+  labs(y='Pathway', x='NES')

 #Run GSEA with ClusterProfiler
 genelist <- data.frame(term = rep(names(miDB_sig5), sapply(miDB_sig5, length)),
@@ -179,7 +204,5 @@ test2 <- GSEA(df2,TERM2GENE = genelist)
 #Enrichment map (Fig.3D)
 test3 <- filter(test2, ID %in% pathways)
 test3 <- pairwise_termsim(test3)
-emapplot(test3, min_edge = 0.01, color = "NES", layout="fr", repel =T)+
+emapplot(test3, min_edge = 0.01, color = "NES", layout="fr", repel =T) +
  scale_fill_gradient2(name=bquote(NES),low="blue", high="red") 
\ No newline at end of file
-#N.B. Produces slightly different plot every time, 
-#but connection between pathways stays the same
\ No newline at end of file
--- a/CB2025_figure_5_scRNAseq.R
+++ b/CB2025_figure_5_scRNAseq.R
@@ -29,7 +29,28 @@ dir.create(plot_dir, showWarnings=F, recursive=T)
 sig <- readRDS("/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/reference/miDB_sig5.MLS.rds")


-###############################################################################
+################################################################################
+# R script for single-cell RNA-seq data analysis and figure generation.
+#
+# This script performs cell annotation, visualization, differential gene expression
+# analysis, and gene set enrichment analysis (GSEA) on single-cell RNA-seq data.
+#
+# Figures generated/data produced:
+#   - Figure 5A: UMAP visualization of cell clusters with final annotation
+#   - Figure 5B: UMAP plot with overlayed density of mOrange+ cells
+#   - Figure 5C: Dotplot of Slc7a11 gene expression across cell types and groups
+#   - Figure 5D: Barplot of GSEA results for selected gene sets
+#   - Figure 5E: Heatmap of cytokine signature GSEA results
+#   - Supplementary Figure 5A: Dotplot of marker gene expression across cell types
+#   - Supplementary Figure 5B: CSV tables for cell distribution per cluster and sample
+#   - Supplementary Figure 5C: CSV tables for mOrange+ cell distribution per cluster and sample
+#
+# Input data files:
+#   - RDS object: "CB1_CB3_CB4_final.rds" (Seurat object containing scRNA-seq data)
+#   - RDS object: "miDB_sig5.MLS.rds" (GO terms database for GSEA)
+#
+################################################################################
+

 set.seed(42)


--- a/TCGA_analysis.R
+++ b/TCGA_analysis.R
@@ -7,44 +7,73 @@ library(readxl)
 library(survminer)
 library(gridExtra)
 library(ggplot2)
+
 ##Load dataset
-#username <- "C:/Users/notaro.marco/"
-username <- "/Users/bresesti.chiara/"
-wdir<- paste0(username, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al Cell Reports/TCGA_analysis")
+wdir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts"
 setwd(wdir)
-metaD<-data.frame(fread("TCGA_phenotype_denseDataOnlyDownload.tsv.gz", sep='\t',colClasses=c("character"),data.table=FALSE))
-Surv01<-data.frame(fread("Survival_SupplementalTable_S1_20171025_xena_sp", sep='\t',colClasses=c("character"),data.table=FALSE))
+
+input_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
+output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
+
+
+################################################################################
+# R script for TCGA survival analysis of miR-342-3p expression.
+#
+# This script analyzes TCGA pancancer miRNA expression and survival data to assess
+# the prognostic value of hsa-miR-342-3p across different tumor types.
+# It calculates hazard ratios (HR) and generates Kaplan-Meier survival curves
+# to visualize the association between miR-342-3p expression levels and patient survival.
+#
+# Figures generated:
+#   - HR_alltumors_342.pdf (HR forest plot): Forest plot visualizing hazard ratios and
+#     significance of miR-342-3p expression on overall survival across various tumor types.
+#   - Survival_***_342 (Survival curves - multiple tumors): Set of Kaplan-Meier survival plots
+#     for tumor types showing significant hazard ratios, illustrating survival differences
+#     between patients with high and low miR-342-3p expression.
+#   - Survival_metastatic_342.pdf (Survival curve - metastatic tumors): Kaplan-Meier survival plot
+#     specifically for metastatic tumors, showing the impact of miR-342-3p expression on survival
+#     in this patient subgroup.
+#
+# Input data files:
+#   - TCGA_phenotype_denseDataOnlyDownload.tsv.gz: TCGA patient phenotype data (metadata),
+#     downloaded from TCGA or Xena, contains clinical and demographic information.
+#   - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, provides overall survival (OS) time and status.
+#   - pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz:
+#     TCGA pancancer miRNA expression data (FPKM values), downloaded from Xena,
+#     contains miRNA expression levels across different tumor samples.
+#
+# Note:
+#   - The script filters tumor types based on p-value significance (p < 0.1 for HR plot, p < 0.05 for survival curves)
+#     and minimum patient number (n > 100 for HR plot). These thresholds can be adjusted within the script.
+################################################################################
+
+
+metaD<-data.frame(fread(paste0(input_dir, "/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"), sep='\t', colClasses=c("character"), data.table=FALSE))
+Surv01<-data.frame(fread(paste0(input_dir, "/Survival_SupplementalTable_S1_20171025_xena_sp"), sep='\t', colClasses=c("character"), data.table=FALSE))
 # FPKM01<-fread("tcga_RSEM_Hugo_norm_count")
-FPKM01<-fread("pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz")
+FPKM01<-fread(paste0(input_dir, "/pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"))
 FPKM01<-as.data.frame(FPKM01)
-# Annot<-data.frame(fread("probeMap_gencode.v23.annotation.gene.probemap", sep='\t',colClasses=c("character"),data.table=FALSE))
+# Annot<-data.frame(fread(paste0(input_dir, "/probeMap_gencode.v23.annotation.gene.probemap"), sep='\t',colClasses=c("character"),data.table=FALSE))
 allGenes <- FPKM01[,1]

-
-
-
 #######################################################################
 #####PLOT HR Score of signature for multiple tumor types###############
 #####################################################################
 humanGenes<-list(c("hsa-miR-342-3p"))

-
 ####create xlsx file with genes, patients and tumor types
 Pan.data3 <- metaD[,]
 Pan.data4 <- merge(Surv01,Pan.data3,by=1)
 Genes_selected <- FPKM01[FPKM01[,1]%in%humanGenes,]

-
 rownames(Genes_selected)<-Genes_selected[,1]
 Genes_selected<-Genes_selected[,-1]
 Genes_selected <- data.frame(t(Genes_selected))

-
 Genes_selected$sample<-rownames(Genes_selected)
 Pan.data5 <- merge(Genes_selected,Pan.data4,by.y="sample")
 colnames(Pan.data5)
-#setwd(wdir)
-#write.xlsx(Pan.data5, "miR342_patients_survival.xlsx")
+write.xlsx(Pan.data5, paste0(ourdir, "/miR342_patients_survival.xlsx"))


 #############Regression with defined k2
@@ -63,7 +92,7 @@ for (i in unique(Pan.data5$cancer.type.abbreviation)){
    df2 <- rbind(df2,df1)
    df2<-df2[(order(df2$p)),]
  }else{}}
-#write.xlsx(df2, "miR342_HR_by_tumortype.xlsx")
+write.xlsx(df2, paste0(output_dir, "/miR342_HR_by_tumortype.xlsx"))


 ###Filters selected tumors
@@ -75,21 +104,13 @@ a<-ggplot(df3, aes(x = reorder(Tumor, HR), y = HR, color = significance, label =
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = pmax(HR-SE, 0), ymax = HR + SE), width = 0.2) +
  scale_color_manual(name = "Color", values = c("red", "blue")) +
-  labs(title = "HR by tumor type",
-       x = "Tumor Type",
-       y = "HR") +
+  labs(title = "HR by tumor type", x = "Tumor Type", y = "HR") +
  geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 1), fill = "lightblue", alpha = 0.01) +
  geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 1, ymax = Inf), fill = "pink", alpha = 0.01)+
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust= 1, color = "Black")) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "gray")
-
-
-# pdf("HR_alltumors_342.pdf",width=9,height = 4)
-a
-# dev.off()
-
-
+ggsave(filename = paste0(output_dir, "/HR_alltumors_342.pdf"), plot=a, width=9, height=4)


 ####Plot survival of chosen tumors
@@ -98,7 +119,7 @@ df3<-df3[order(df3$HR),]
 df3<-df3[df3$p<0.05,]


-# pdf("Survival_***_342",width=10,height = 6)
+#pdf(paste0(output_dir, "/Survival_***_342"), width=10,height = 6)
 par(mfrow = c(2,4))
 survival<-list()
 for (i in df3$Tumor){
@@ -141,7 +162,7 @@ formatted_p_value <- ifelse(p_value < 0.0001, "<0.0001",  as.numeric(round(p_val
 par(las = 0)


-pdf("Survival_metastatic_342.pdf",width=5,height =5)
+pdf(paste0(output_dir, "/Survival_metastatic_342.pdf"), width=5,height =5)
 plot(fit.score, lty = c(1, 1), col = c("blue", "red"), xlab = "Time (d)", ylab = "Overall Survival", lwd = 2, bty = "n",
     main = "Metastatic tumors") 
 #Add legend