Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Bresesti Cell Reports 2025
Commits
cad0b7de
Commit
cad0b7de
authored
Mar 25, 2025
by
Marco Monti
Browse files
I updated the scripts
parent
a78a5561
Changes
4
Show whitespace changes
Inline
Side-by-side
CB2025_figure_1_RNAseq.R
View file @
cad0b7de
...
...
@@ -2,35 +2,56 @@ library("readxl")
library
(
"dplyr"
)
#####Directories#####
us
<-
"/Users/Squadrito/"
us
<-
"C:/Users/bresesti.chiara/"
wdir1509
<-
paste0
(
us
,
"/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis"
)
wdir1510
<-
paste0
(
us
,
"/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX"
)
fdir
<-
paste0
(
us
,
"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures"
)
# us <-"C:/Users/bresesti.chiara/"
# wdir1509<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
# wdir1510<-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
# fdir <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
#
# input_d <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1509_RNASeq_QIAseq_UPX/QIAseqUltraplexRNA_181342/primary_analysis")
# wdir1510 <-paste0(us, "/Dropbox (HSR Global)/SquadritoM_1510_RNA_miRNA_QIAseq_UPX")
# output_d <- paste0(us, "/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir
<-
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
output_dir
<-
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
################################################################################
# R script to generate data and figures for manuscript figures 1B, 1C, and 1D.
#
# This script reads RNA-seq and miRNA-seq data from Excel files, performs
# normalization and data transformation, and generates output tables and plots.
#
# Figures generated/data produced:
# - Figure 1B: Heatmap data for selected marker genes (Excel table)
# - Figure 1C: Pairwise MA plots for miRNA data
# - Figure 1D: Heatmap data for selected miRNA families (Excel table)
#
# Input data files:
# - Figure 1B: "QIAseqUltraplexRNA_181342.xlsx" (Sheet 3: umis.genes.polyA-mouse)
# - Figure 1C & 1D: "173308.all_samples.summary.xlsx" (Sheet 2: miRNA_piRNA)
# - Figure 1D: "miRNA_Family.xlsx" (Sheet 1)
#
################################################################################
#### Figure 1B ####
setwd
(
wdir1509
)
df1
<-
as.data.frame
(
read_excel
(
"QIAseqUltraplexRNA_181342.xlsx"
,
sheet
=
3
,
col_names
=
T
,
skip
=
1
))
#sheet: umis.genes.polyA-mouse
df1
<-
df1
[,
-
c
(
1
,
3
:
6
)]
#keep only gene name and UMI counts
df1
[,
-1
]
<-
apply
(
df1
[,
-1
],
2
,
function
(
x
){
x
/
sum
(
x
)
*
1000000
})
#UMI normalized by CPM
df1
<-
as.data.frame
(
read_excel
(
paste0
(
input_dir
,
"/QIAseqUltraplexRNA_181342.xlsx"
),
sheet
=
3
,
col_names
=
T
,
skip
=
1
))
#sheet: umis.genes.polyA-mouse
df1
<-
df1
[,
-
c
(
1
,
3
:
6
)]
# keep only gene name and UMI counts
df1
[,
-1
]
<-
apply
(
df1
[,
-1
],
2
,
function
(
x
){
x
/
sum
(
x
)
*
1000000
})
# UMI normalized by CPM
gene_vector
<-
c
(
"Cd19"
,
"Ms4a1"
,
"Fcer2a"
,
"Ighm"
,
"Cd8a"
,
"Xcr1"
,
"Itgae"
,
"Itgax"
,
"Ccr2"
,
"Itgam"
,
"Mgl2"
,
"Cd68"
,
"Vcam1"
,
"Csf1r"
,
"Adgre1"
,
"Siglec1"
,
"Hmox1"
,
"Timd4"
,
"Vsig4"
,
"Clec4f"
,
"Marco"
,
"Pecam1"
,
"Tek"
,
"Lyve1"
,
"Stab2"
)
df2
<-
df1
[
df1
$
gene
%in%
gene_vector
,]
%>%
arrange
(
factor
(
gene
,
levels
=
gene_vector
))
rownames
(
df2
)
<-
df2
[,
1
]
df2.scaled
<-
as.data.frame
(
t
(
scale
(
t
(
df2
[
-1
]))))
#Zscore normalization. Scaled only works on columns, so need to transform
setwd
(
fdir
)
openxlsx
::
write.xlsx
(
df2.scaled
,
"Figure_1B_table.xlsx"
,
rowNames
=
T
)
#the df was exported and used to create a heatmap using Graphpad
df2.scaled
<-
as.data.frame
(
t
(
scale
(
t
(
df2
[
-1
]))))
# Zscore normalization. Scaled only works on columns, so need to transform
openxlsx
::
write.xlsx
(
df2.scaled
,
paste0
(
output_dir
,
"/Figure_1B_table.xlsx"
),
rowNames
=
T
)
# the df was exported and used to create a heatmap using Graphpad
#### Figure 1C ####
# "edgeR_DGE_res_volcano.pdf" (in the 'wdir1509' folder) was imported in illustrator
# and merged with the plot generated by the code below
setwd
(
wdir1510
)
df1
<-
as.data.frame
(
read_excel
(
"173308.all_samples.summary.xlsx"
,
sheet
=
2
,
col_names
=
T
))
#sheet: miRNA_piRNA
df1
<-
df1
[,
1
:
7
]
#UMI
df1
<-
as.data.frame
(
read_excel
(
paste0
(
input_dir
,
"/173308.all_samples.summary.xlsx"
),
sheet
=
2
,
col_names
=
T
))
#sheet: miRNA_piRNA
df1
<-
df1
[,
1
:
7
]
#UMI
rownames
(
df1
)
<-
df1
$
miRNA
df2
<-
apply
(
df1
[,
-1
],
2
,
function
(
x
)
log
(
x
))
#Log counts
df2
<-
as.data.frame
(
limma
::
normalizeCyclicLoess
(
df2
,
weights
=
NULL
,
span
=
0.7
,
iterations
=
5
,
method
=
"pairs"
))
#Cyclic loess normalization
...
...
@@ -41,17 +62,16 @@ upper.panel<-function(x, y, ...){
points
(((
x
+
y
)
/
2
)[
above
],(
x
-
y
)[
above
],
col
=
"red"
,
cex
=
0.6
,
pch
=
19
)
below
<-
(
x
-
y
+1
)
*
((
x
+
y
)
/
2-5
)
<
-5
&
((
x
+
y
)
/
2
)
>
5
points
(((
x
+
y
)
/
2
)[
below
],(
x
-
y
)[
below
],
col
=
"blue"
,
cex
=
0.6
,
pch
=
19
)
}
#function for MA plot
}
#
function for MA plot
pairs
(
df2
[,
1
:
6
],
lower.panel
=
NULL
,
upper.panel
=
upper.panel
,
ylim
=
c
(
-8.5
,
8.5
),
xlim
=
c
(
2
,
14
),
cex.labels
=
2
)
#pairwise plot
ylim
=
c
(
-8.5
,
8.5
),
xlim
=
c
(
2
,
14
),
cex.labels
=
2
)
#
pairwise plot
#### Figure 1D ####
setwd
(
wdir1510
)
df1
<-
as.data.frame
(
read_excel
(
"173308.all_samples.summary.xlsx"
,
sheet
=
2
,
col_names
=
T
))
#sheet: miRNA_piRNA
df1
<-
df1
[
-
which
(
grepl
(
"piR"
,
df1
[,
1
])),
1
:
7
]
#UMI and piRNA removing
df1
[,
1
]
<-
gsub
(
"/.*"
,
""
,
df1
[,
1
])
#leave only first miRNA for ambiguous entries
df1
[,
-1
]
<-
apply
(
df1
[,
-1
],
2
,
function
(
x
){
x
/
sum
(
x
,
na.rm
=
T
)
*
1000000
})
#UMI normalized by CPM
df.families
<-
as.data.frame
(
read_excel
(
"Analysis CB/miRNA Family.xlsx"
,
sheet
=
1
,
col_names
=
T
))
#miRNA families from miRBase
df1
<-
as.data.frame
(
read_excel
(
paste0
(
input_dir
,
"/173308.all_samples.summary.xlsx"
),
sheet
=
2
,
col_names
=
T
))
#sheet: miRNA_piRNA
df1
<-
df1
[
-
which
(
grepl
(
"piR"
,
df1
[,
1
])),
1
:
7
]
# UMI and piRNA removing
df1
[,
1
]
<-
gsub
(
"/.*"
,
""
,
df1
[,
1
])
# leave only first miRNA for ambiguous entries
df1
[,
-1
]
<-
apply
(
df1
[,
-1
],
2
,
function
(
x
){
x
/
sum
(
x
,
na.rm
=
T
)
*
1000000
})
#UMI normalized by CPM
df.families
<-
as.data.frame
(
read_excel
(
"Analysis CB/miRNA_Family.xlsx"
,
sheet
=
1
,
col_names
=
T
))
#miRNA families from miRBase
df.families
<-
df.families
[
df.families
[,
3
]
==
10090
,
c
(
4
,
1
)]
#select mouse entries
df.families
[,
1
]
<-
sub
(
pattern
=
"p.*"
,
replacement
=
"p"
,
x
=
df.families
[,
1
])
df2
<-
merge
(
df.families
,
df1
,
by
=
1
)
...
...
@@ -64,6 +84,5 @@ gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.
df3
<-
subset
(
df2
,
df2
$
`miR family`
%in%
gene_vector
)
%>%
arrange
(
factor
(
`miR family`
,
levels
=
gene_vector
))
rownames
(
df3
)
<-
df3
[,
1
]
df3.scaled
<-
as.data.frame
(
t
(
scale
(
t
(
df3
[
-1
]))))
#Zscore normalization. Scaled only works on columns, so need to transform
setwd
(
fdir
)
openxlsx
::
write.xlsx
(
df2.scaled
,
"Figure_1D_table.xlsx"
,
rowNames
=
T
)
openxlsx
::
write.xlsx
(
df2.scaled
,
paste0
(
output_dir
,
"/Figure_1D_table.xlsx"
),
rowNames
=
T
)
#the df was exported and used to create a heatmap using Graphpad
CB2025_figure_3_RNAseq.R
View file @
cad0b7de
...
...
@@ -7,18 +7,46 @@ library(clusterProfiler)
library
(
enrichplot
)
##### Directories #####
#us <- "/Users/Squadrito/"
us
<-
"C:/Users/bresesti.chiara/"
wdir
<-
paste0
(
us
,
"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr"
)
wdir_CB
<-
paste0
(
us
,
"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2"
)
fdir
<-
paste0
(
us
,
"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures"
)
# us <-"C:/Users/bresesti.chiara/"
# wdir<-paste0(us,"/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/05-DGE-NoOut-Corr")
# wdir_CB<-paste0(us, "/Dropbox (HSR Global)/90-857433247_RNAseq_Squadrito/Analysis CB_v2")
# fdir <- paste0(us,"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al/Scripts/plots and tables used in figures")
input_dir
<-
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
output_dir
<-
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
################################################################################
#
# R script to generate data and figures for manuscript figures 3A, 3B, 3C, and 3D.
# This script performs differential gene expression analysis and gene set enrichment
# analysis to generate volcano plots, empirical cumulative distribution function (ECDF) plots,
# dot plots, and enrichment maps.
#
# Figures generated:
# - Figure 3A & 3B: Volcano plots of differentially expressed genes in miR-342-3p
# overexpression and sponge experiments, and ECDF plots comparing
# logFC distributions of all genes vs. miR-342-3p target genes.
# - Figure 3C: Dot plot visualizing enriched pathways from gene set enrichment analysis (GSEA).
# - Figure 3D: Enrichment map visualizing relationships between enriched pathways.
#
# Input data files:
# - Figure 3A & 3B: "edgeR_results.xlsx" (Sheet "miR342-control" and "spongeBT-control")
# (Output from differential gene expression analysis, likely using edgeR)
# - Figure 3A & 3B: "TargetScan8.0__miR-342-3p.predicted_targets.xlsx" (Sheet 1)
# (List of predicted miR-342-3p target genes from TargetScan)
# - Figure 3C & 3D: "EdgeR_results.xlsx" (Sheet "miR342-control")
# (Same as input for Figure 3A & 3B, used for GSEA)
# - Figure 3C & 3D: "miDB_sig5.MLS.rds" (RDS file containing gene sets for gene set enrichment analysis)
#
################################################################################
#### Figure 3A&B ####
#Import df
setwd
(
wdir
)
miR_ctrl
<-
read_excel
(
"edgeR_results.xlsx"
,
sheet
=
"miR342-control"
)
sponge_ctrl
<-
read_excel
(
"edgeR_results.xlsx"
,
sheet
=
"spongeBT-control"
)
miR_ctrl
<-
read_excel
(
paste0
(
input_dir
,
"/edgeR_results.xlsx"
),
sheet
=
"miR342-control"
)
sponge_ctrl
<-
read_excel
(
paste0
(
input_dir
,
"/edgeR_results.xlsx"
),
sheet
=
"spongeBT-control"
)
#Add DEG color and label for volcano plot
miR_ctrl
$
DEG
<-
"NO"
...
...
@@ -60,9 +88,8 @@ ggplot(data=data, aes(x=logFC, y=-log10(PValue), col=DEG, label=DEG_label)) +
xlab
(
bquote
(
Log
[
2
](
FC
)))
+
ylab
(
bquote
(
-
Log
[
10
](
PVal
)))
#Import miR-342-3p target list (from TargetScan)
setwd
(
wdir_CB
)
miR342_targets
<-
read_excel
(
"TargetScan8.0__miR-342-3p.predicted_targets.xlsx"
)
# Import miR-342-3p target list (from TargetScan)
miR342_targets
<-
read_excel
(
paste0
(
input_dir
,
"/TargetScan8.0__miR-342-3p.predicted_targets.xlsx"
))
miR342_targets
<-
filter
(
miR342_targets
,
miR342_targets
$
`Cumulative weighted context++ score`
<
(
-0.3
))
#ecdf plot (right panel)
...
...
@@ -94,15 +121,13 @@ plot(ecdf(data$logFC), lwd = 2, do.points=F, verticals=T,
abline
(
h
=
0.5
,
col
=
"black"
)
+
legend
(
"bottomright"
,
c
(
"All genes"
,
"miR-342-3p targets"
),
col
=
c
(
"black"
,
"red"
),
lwd
=
2
,
cex
=
0.7
)
ks.test
(
test
$
logFC
,
data
$
logFC
,
alternative
=
"l"
)
#Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data
ks.test
(
test
$
logFC
,
data
$
logFC
,
alternative
=
"l"
)
#
Kolmogorov-Smirnov test to calculate p-val of miR target distribution *not* less than average data
#### Figure 3C&D ####
#Upload df
setwd
(
wdir
)
df1
<-
read_excel
(
"EdgeR_results.xlsx"
,,
sheet
=
"miR342-control"
)
setwd
(
wdir_CB
)
miDB_sig5
<-
readRDS
(
"miDB_sig5.MLS.rds"
)
#GO terms db
# Load df
df1
<-
read_excel
(
paste0
(
input_dir
,
"/EdgeR_results.xlsx"
),
sheet
=
"miR342-control"
)
miDB_sig5
<-
readRDS
(
paste0
(
input_dir
,
"/miDB_sig5.MLS.rds"
))
#GO terms db
#Rename pathways of interest in miDB_sig5
names
(
miDB_sig5
)[
names
(
miDB_sig5
)
==
"HALLMARK_OXIDATIVE_PHOSPHORYLATION"
]
<-
"Oxydative phosphorylation"
...
...
@@ -128,18 +153,18 @@ names(miDB_sig5)[names(miDB_sig5) == "HALLMARK_ANGIOGENESIS"] <- "Angiogenesis"
names
(
miDB_sig5
)[
names
(
miDB_sig5
)
==
"GOMF_PATTERN_RECOGNITION_RECEPTOR_ACTIVITY"
]
<-
"Pattern recognition receptor activity"
names
(
miDB_sig5
)[
names
(
miDB_sig5
)
==
"PGE2_RO"
]
<-
"PGE2 response genes"
#Filter low expression, NA and order by FC
#
Filter low expression, NA and order by FC
df1
<-
filter
(
df1
,
df1
$
logCPM
>
5
&
!
is.na
(
df1
$
...1
))
df1
<-
df1
[
order
(
df1
$
logFC
),]
df2
<-
df1
$
logFC
names
(
df2
)
<-
df1
$
...1
#Run GSEA with fgsea and filter by PVal
#
Run GSEA with fgsea and filter by PVal
sig
<-
miDB_sig5
test1
<-
fgsea
(
sig
,
df2
,
minSize
=
7
,
maxSize
=
500
,
nproc
=
1
)
PvalResult
<-
filter
(
test1
,
test1
$
padj
<=
0.05
)
test1
<-
fgsea
(
sig
,
df2
,
minSize
=
7
,
maxSize
=
500
,
nproc
=
1
)
PvalResult
<-
filter
(
test1
,
test1
$
padj
<=
0.05
)
#DotPlot pathways of interest (Fig.3C)
#
DotPlot pathways of interest (Fig.3C)
pathways
<-
c
(
"Oxydative phosphorylation"
,
"Regulation of cholesterol metabolic process"
,
"Response to IL12"
,
...
...
@@ -168,7 +193,7 @@ ggplot(genelist, aes(x=NES, y=reorder(pathway,NES), size=size ,color=padj)) +
geom_point
()
+
scale_size_area
(
limits
=
c
(
10
,
450
),
max_size
=
15
)
+
scale_colour_gradient
(
low
=
"red"
,
high
=
"blue"
)
+
labs
(
y
=
'Pathway'
,
x
=
'NES'
)
labs
(
y
=
'Pathway'
,
x
=
'NES'
)
#Run GSEA with ClusterProfiler
genelist
<-
data.frame
(
term
=
rep
(
names
(
miDB_sig5
),
sapply
(
miDB_sig5
,
length
)),
...
...
@@ -179,7 +204,5 @@ test2 <- GSEA(df2,TERM2GENE = genelist)
#Enrichment map (Fig.3D)
test3
<-
filter
(
test2
,
ID
%in%
pathways
)
test3
<-
pairwise_termsim
(
test3
)
emapplot
(
test3
,
min_edge
=
0.01
,
color
=
"NES"
,
layout
=
"fr"
,
repel
=
T
)
+
emapplot
(
test3
,
min_edge
=
0.01
,
color
=
"NES"
,
layout
=
"fr"
,
repel
=
T
)
+
scale_fill_gradient2
(
name
=
bquote
(
NES
),
low
=
"blue"
,
high
=
"red"
)
\ No newline at end of file
#N.B. Produces slightly different plot every time,
#but connection between pathways stays the same
\ No newline at end of file
CB2025_figure_5_scRNAseq.R
View file @
cad0b7de
...
...
@@ -29,7 +29,28 @@ dir.create(plot_dir, showWarnings=F, recursive=T)
sig
<-
readRDS
(
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/reference/miDB_sig5.MLS.rds"
)
###############################################################################
################################################################################
# R script for single-cell RNA-seq data analysis and figure generation.
#
# This script performs cell annotation, visualization, differential gene expression
# analysis, and gene set enrichment analysis (GSEA) on single-cell RNA-seq data.
#
# Figures generated/data produced:
# - Figure 5A: UMAP visualization of cell clusters with final annotation
# - Figure 5B: UMAP plot with overlayed density of mOrange+ cells
# - Figure 5C: Dotplot of Slc7a11 gene expression across cell types and groups
# - Figure 5D: Barplot of GSEA results for selected gene sets
# - Figure 5E: Heatmap of cytokine signature GSEA results
# - Supplementary Figure 5A: Dotplot of marker gene expression across cell types
# - Supplementary Figure 5B: CSV tables for cell distribution per cluster and sample
# - Supplementary Figure 5C: CSV tables for mOrange+ cell distribution per cluster and sample
#
# Input data files:
# - RDS object: "CB1_CB3_CB4_final.rds" (Seurat object containing scRNA-seq data)
# - RDS object: "miDB_sig5.MLS.rds" (GO terms database for GSEA)
#
################################################################################
set.seed
(
42
)
...
...
TCGA_analysis.R
View file @
cad0b7de
...
...
@@ -7,44 +7,73 @@ library(readxl)
library
(
survminer
)
library
(
gridExtra
)
library
(
ggplot2
)
##Load dataset
#username <- "C:/Users/notaro.marco/"
username
<-
"/Users/bresesti.chiara/"
wdir
<-
paste0
(
username
,
"/Dropbox (HSR Global)/CancerGeneTherapy/Cancer Gene Therapy/MS/2024 Bresesti et al Cell Reports/TCGA_analysis"
)
wdir
<-
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts"
setwd
(
wdir
)
metaD
<-
data.frame
(
fread
(
"TCGA_phenotype_denseDataOnlyDownload.tsv.gz"
,
sep
=
'\t'
,
colClasses
=
c
(
"character"
),
data.table
=
FALSE
))
Surv01
<-
data.frame
(
fread
(
"Survival_SupplementalTable_S1_20171025_xena_sp"
,
sep
=
'\t'
,
colClasses
=
c
(
"character"
),
data.table
=
FALSE
))
input_dir
<-
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/reference"
output_dir
<-
"/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq_Bresesti/Analysis_MM/GitLab_scripts/Output"
################################################################################
# R script for TCGA survival analysis of miR-342-3p expression.
#
# This script analyzes TCGA pancancer miRNA expression and survival data to assess
# the prognostic value of hsa-miR-342-3p across different tumor types.
# It calculates hazard ratios (HR) and generates Kaplan-Meier survival curves
# to visualize the association between miR-342-3p expression levels and patient survival.
#
# Figures generated:
# - HR_alltumors_342.pdf (HR forest plot): Forest plot visualizing hazard ratios and
# significance of miR-342-3p expression on overall survival across various tumor types.
# - Survival_***_342 (Survival curves - multiple tumors): Set of Kaplan-Meier survival plots
# for tumor types showing significant hazard ratios, illustrating survival differences
# between patients with high and low miR-342-3p expression.
# - Survival_metastatic_342.pdf (Survival curve - metastatic tumors): Kaplan-Meier survival plot
# specifically for metastatic tumors, showing the impact of miR-342-3p expression on survival
# in this patient subgroup.
#
# Input data files:
# - TCGA_phenotype_denseDataOnlyDownload.tsv.gz: TCGA patient phenotype data (metadata),
# downloaded from TCGA or Xena, contains clinical and demographic information.
# - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, provides overall survival (OS) time and status.
# - pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz:
# TCGA pancancer miRNA expression data (FPKM values), downloaded from Xena,
# contains miRNA expression levels across different tumor samples.
#
# Note:
# - The script filters tumor types based on p-value significance (p < 0.1 for HR plot, p < 0.05 for survival curves)
# and minimum patient number (n > 100 for HR plot). These thresholds can be adjusted within the script.
################################################################################
metaD
<-
data.frame
(
fread
(
paste0
(
input_dir
,
"/TCGA_phenotype_denseDataOnlyDownload.tsv.gz"
),
sep
=
'\t'
,
colClasses
=
c
(
"character"
),
data.table
=
FALSE
))
Surv01
<-
data.frame
(
fread
(
paste0
(
input_dir
,
"/Survival_SupplementalTable_S1_20171025_xena_sp"
),
sep
=
'\t'
,
colClasses
=
c
(
"character"
),
data.table
=
FALSE
))
# FPKM01<-fread("tcga_RSEM_Hugo_norm_count")
FPKM01
<-
fread
(
"
pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"
)
FPKM01
<-
fread
(
paste0
(
input_dir
,
"/
pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz"
)
)
FPKM01
<-
as.data.frame
(
FPKM01
)
# Annot<-data.frame(fread(
"
probeMap_gencode.v23.annotation.gene.probemap", sep='\t',colClasses=c("character"),data.table=FALSE))
# Annot<-data.frame(fread(
paste0(input_dir, "/
probeMap_gencode.v23.annotation.gene.probemap"
)
, sep='\t',colClasses=c("character"),data.table=FALSE))
allGenes
<-
FPKM01
[,
1
]
#######################################################################
#####PLOT HR Score of signature for multiple tumor types###############
#####################################################################
humanGenes
<-
list
(
c
(
"hsa-miR-342-3p"
))
####create xlsx file with genes, patients and tumor types
Pan.data3
<-
metaD
[,]
Pan.data4
<-
merge
(
Surv01
,
Pan.data3
,
by
=
1
)
Genes_selected
<-
FPKM01
[
FPKM01
[,
1
]
%in%
humanGenes
,]
rownames
(
Genes_selected
)
<-
Genes_selected
[,
1
]
Genes_selected
<-
Genes_selected
[,
-1
]
Genes_selected
<-
data.frame
(
t
(
Genes_selected
))
Genes_selected
$
sample
<-
rownames
(
Genes_selected
)
Pan.data5
<-
merge
(
Genes_selected
,
Pan.data4
,
by.y
=
"sample"
)
colnames
(
Pan.data5
)
#setwd(wdir)
#write.xlsx(Pan.data5, "miR342_patients_survival.xlsx")
write.xlsx
(
Pan.data5
,
paste0
(
ourdir
,
"/miR342_patients_survival.xlsx"
))
#############Regression with defined k2
...
...
@@ -63,7 +92,7 @@ for (i in unique(Pan.data5$cancer.type.abbreviation)){
df2
<-
rbind
(
df2
,
df1
)
df2
<-
df2
[(
order
(
df2
$
p
)),]
}
else
{}}
#
write.xlsx(df2,
"
miR342_HR_by_tumortype.xlsx")
write.xlsx
(
df2
,
paste0
(
output_dir
,
"/
miR342_HR_by_tumortype.xlsx"
)
)
###Filters selected tumors
...
...
@@ -75,21 +104,13 @@ a<-ggplot(df3, aes(x = reorder(Tumor, HR), y = HR, color = significance, label =
geom_point
(
size
=
4
)
+
geom_errorbar
(
aes
(
ymin
=
pmax
(
HR
-
SE
,
0
),
ymax
=
HR
+
SE
),
width
=
0.2
)
+
scale_color_manual
(
name
=
"Color"
,
values
=
c
(
"red"
,
"blue"
))
+
labs
(
title
=
"HR by tumor type"
,
x
=
"Tumor Type"
,
y
=
"HR"
)
+
labs
(
title
=
"HR by tumor type"
,
x
=
"Tumor Type"
,
y
=
"HR"
)
+
geom_rect
(
aes
(
xmin
=
-
Inf
,
xmax
=
Inf
,
ymin
=
-
Inf
,
ymax
=
1
),
fill
=
"lightblue"
,
alpha
=
0.01
)
+
geom_rect
(
aes
(
xmin
=
-
Inf
,
xmax
=
Inf
,
ymin
=
1
,
ymax
=
Inf
),
fill
=
"pink"
,
alpha
=
0.01
)
+
theme_minimal
()
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
,
color
=
"Black"
))
+
geom_hline
(
yintercept
=
1
,
linetype
=
"dashed"
,
color
=
"gray"
)
# pdf("HR_alltumors_342.pdf",width=9,height = 4)
a
# dev.off()
ggsave
(
filename
=
paste0
(
output_dir
,
"/HR_alltumors_342.pdf"
),
plot
=
a
,
width
=
9
,
height
=
4
)
####Plot survival of chosen tumors
...
...
@@ -98,7 +119,7 @@ df3<-df3[order(df3$HR),]
df3
<-
df3
[
df3
$
p
<
0.05
,]
#
pdf(
"
Survival_***_342"
,
width=10,height = 6)
#pdf(
paste0(output_dir, "/
Survival_***_342"
),
width=10,height = 6)
par
(
mfrow
=
c
(
2
,
4
))
survival
<-
list
()
for
(
i
in
df3
$
Tumor
){
...
...
@@ -141,7 +162,7 @@ formatted_p_value <- ifelse(p_value < 0.0001, "<0.0001", as.numeric(round(p_val
par
(
las
=
0
)
pdf
(
"
Survival_metastatic_342.pdf"
,
width
=
5
,
height
=
5
)
pdf
(
paste0
(
output_dir
,
"/
Survival_metastatic_342.pdf"
),
width
=
5
,
height
=
5
)
plot
(
fit.score
,
lty
=
c
(
1
,
1
),
col
=
c
(
"blue"
,
"red"
),
xlab
=
"Time (d)"
,
ylab
=
"Overall Survival"
,
lwd
=
2
,
bty
=
"n"
,
main
=
"Metastatic tumors"
)
#Add legend
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment