Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Molteni NatMed2024
Commits
a988d531
"vscode:/vscode.git/clone" did not exist on "cb760cad6e2636fecfeab31eaa72e585b439d259"
Commit
a988d531
authored
Nov 22, 2024
by
Ivan Merelli
Browse files
Upload New File
parent
f4a3560c
Changes
1
Hide whitespace changes
Inline
Side-by-side
fiumara_20241115_UploadScript.R
0 → 100644
View file @
a988d531
### upload on gitlab
# 1) load R packages
library
(
Seurat
)
library
(
GEOquery
)
library
(
limma
)
library
(
dplyr
)
library
(
data.table
)
library
(
clusterProfiler
)
library
(
ggplot2
)
library
(
RColorBrewer
)
library
(
qusage
)
library
(
UCell
)
# 2) define input and output paths
# set path to the working directory
path
<-
""
# 3) load scRNA-seq BM-object
## 3.1) restrict analysis to CD34+ clusters
obj
<-
readRDS
(
file
=
paste0
(
path
,
"BM_20240222.rds"
))
CD34p_clusters
<-
as.character
(
c
(
2
,
8
,
14
,
15
,
16
,
19
,
20
,
26
,
27
,
30
,
33
))
obj
$
seurat
<-
obj
$
RNA_snn_h.orig.ident_res.1.8
obj
<-
SetIdent
(
object
=
obj
,
value
=
"seurat"
)
obj
<-
subset
(
obj
,
subset
=
(
seurat
%in%
CD34p_clusters
))
saveRDS
(
object
=
obj
,
file
=
paste0
(
path
,
"BM_CD34p.rds"
))
## 3.2) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
"path"
]
rm
(
list
=
l
);
gc
()
# 4) load published scRNA-seq dataset
## 4.1) Ainciburu et al.
### 4.1.1) download tar file and metadata
destfolder
<-
paste0
(
path
,
"GSE180298/"
);
dir.create
(
path
=
destfolder
,
showWarnings
=
F
,
recursive
=
T
)
destfile
<-
paste0
(
destfolder
,
"GSE180298_RAW.tar"
)
download.file
(
url
=
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file"
,
destfile
=
destfile
)
metadata_files
<-
c
(
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Felderly%5Fmetadata%2Etxt%2Egz"
,
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fmds%5Fmetadata%2Etxt%2Egz"
,
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE180298&format=file&file=GSE180298%5Fyoung%5Fmetadata%2Etxt%2Egz"
)
download.file
(
url
=
metadata_files
,
destfile
=
paste0
(
destfolder
,
"GSE180298_"
,
c
(
"elderly"
,
"mds"
,
"young"
),
"_metadata.txt.gz"
))
### 4.1.2) untar file
untar
(
tarfile
=
destfile
,
exdir
=
paste0
(
destfolder
,
"GSE180298_RAW/"
))
files
<-
list.files
(
path
=
paste0
(
destfolder
,
"GSE180298_RAW/"
),
full.names
=
T
)
md
<-
list.files
(
path
=
destfolder
,
full.names
=
T
)
md_files
<-
md
[
grepl
(
md
,
pattern
=
"metadata"
)]
### 4.1.3) define seurat object
metadata
<-
c
()
for
(
i
in
seq_len
(
length
(
md_files
))){
gunzip
(
md_files
[
i
],
remove
=
FALSE
,
overwrite
=
TRUE
)
unzfile
<-
gsub
(
md_files
[
i
],
pattern
=
".gz"
,
replacement
=
""
)
temp
<-
read.delim
(
file
=
unzfile
,
header
=
T
,
sep
=
"\t"
)
metadata
<-
rbind
(
metadata
,
temp
)
}
metadata
<-
as.data.frame
(
metadata
)
metadata
$
orig.ident
<-
strsplit2
(
rownames
(
metadata
),
split
=
"\\_"
)[,
2
]
counts
<-
cell_counts
<-
samples
<-
c
()
for
(
i
in
seq_len
(
length
(
files
))){
split_filename
<-
strsplit2
(
files
[
i
],
split
=
"/"
)
sampleid
<-
strsplit2
(
split_filename
[,
ncol
(
split_filename
)],
split
=
"\\_"
)[,
2
]
print
(
paste0
(
"Processing file "
,
sampleid
,
" ("
,
i
,
" over "
,
length
(
files
),
")"
))
# load h5 object
temp
<-
Read10X_h5
(
filename
=
files
[
i
],
use.names
=
TRUE
,
unique.features
=
TRUE
)
coln
<-
paste0
(
strsplit2
(
colnames
(
temp
),
split
=
"-"
)[,
1
],
"_"
,
sampleid
)
colnames
(
temp
)
<-
coln
# store
temp_object
<-
CreateSeuratObject
(
counts
=
temp
)
counts_temp
<-
matrix
(
data
=
temp_object
@
assays
$
RNA
@
counts
,
ncol
=
ncol
(
temp_object
))
rownames
(
counts_temp
)
<-
rownames
(
temp_object
)
colnames
(
counts_temp
)
<-
colnames
(
temp_object
)
# store info
cell_counts
<-
c
(
cell_counts
,
ncol
(
counts_temp
))
samples
<-
c
(
samples
,
sampleid
)
if
(
is.null
(
nrow
(
counts
))){
counts
<-
counts_temp
gs
<-
rownames
(
counts
)
}
else
{
gs
<-
intersect
(
gs
,
rownames
(
counts_temp
))
counts
<-
cbind
(
counts
[
match
(
gs
,
rownames
(
counts
)),
],
counts_temp
[
match
(
gs
,
rownames
(
counts_temp
)),
])
}
}
names
(
cell_counts
)
<-
samples
md
<-
metadata
[
rownames
(
metadata
)
%in%
colnames
(
counts
),];
dim
(
md
)
m
<-
match
(
rownames
(
md
),
colnames
(
counts
));
table
(
is.na
(
m
))
counts
<-
counts
[,
m
]
GSE180298
<-
CreateSeuratObject
(
counts
=
counts
,
meta.data
=
metadata
)
### 4.1.4) select only elderly donors
GSE180298
<-
SetIdent
(
object
=
GSE180298
,
value
=
"orig.ident"
)
elderly_sampleid
<-
unique
(
GSE180298
$
orig.ident
)[
grepl
(
unique
(
GSE180298
$
orig.ident
),
pattern
=
"elderly"
)]
GSE180298
<-
subset
(
GSE180298
,
subset
=
(
orig.ident
%in%
elderly_sampleid
))
saveRDS
(
object
=
GSE180298
,
file
=
paste0
(
path
,
"GSE180298_elderly.rds"
));
gc
()
## 4.1.5) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
"path"
]
rm
(
list
=
l
);
gc
()
## 4.2) Wu et al.
### 4.2.1) download tar file
destfolder
<-
paste0
(
path
,
"GSE196052/"
);
dir.create
(
path
=
destfolder
,
showWarnings
=
F
,
recursive
=
T
)
destfile
<-
paste0
(
destfolder
,
"GSE196052_RAW.tar"
)
download.file
(
url
=
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file"
,
destfile
=
destfile
)
metadata_files
<-
c
(
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE196052&format=file&file=GSE196052%5FdataCount%5FCD34%2Ecsv%2Egz"
)
download.file
(
url
=
metadata_files
,
destfile
=
paste0
(
destfolder
,
"GSE196052_dataCount_CD34.csv.gz"
))
### 4.2.2) untar file
untar
(
tarfile
=
destfile
,
exdir
=
paste0
(
destfolder
,
"GSE196052_RAW/"
))
files_annotation
<-
list.files
(
path
=
paste0
(
destfolder
,
"GSE196052_RAW/"
),
full.names
=
T
)
files
<-
list.files
(
path
=
destfolder
,
full.names
=
T
)
### 4.2.3) define seurat object
md
<-
read.delim
(
file
=
files
[
grepl
(
files
,
pattern
=
"SraRun"
)],
header
=
T
,
sep
=
","
)
table
(
md
$
subject_id.status
,
md
$
tissue.cell_type
)
mdfiles
<-
files_annotation
[
grepl
(
files_annotation
,
pattern
=
"GSM"
)]
metadata
<-
c
()
for
(
i
in
seq_len
(
length
(
mdfiles
))){
x
<-
mdfiles
[
i
]
temp
<-
read.delim
(
gzfile
(
x
),
header
=
T
,
sep
=
","
)
metadata
<-
rbind
(
metadata
,
temp
)
}
f
<-
files
[
grepl
(
files
,
pattern
=
"CD34.csv.gz"
)]
counts_cd34
<-
fread
(
f
)
%>%
as.data.frame
()
rn
<-
as.character
(
counts_cd34
[,
1
])
counts_cd34
<-
counts_cd34
[,
-1
]
rownames
(
counts_cd34
)
<-
rn
cd34_cells
<-
colnames
(
counts_cd34
)
cd34_cells
<-
gsub
(
x
=
cd34_cells
,
pattern
=
"CD34_"
,
replacement
=
""
)
wu_annot
<-
read.csv
(
file
=
paste0
(
destfolder
,
"CD34_metaDatatSNECellType_ALiceManual.csv"
),
header
=
T
)
m
<-
match
(
cd34_cells
,
wu_annot
$
orig.ident
);
table
(
is.na
(
m
))
status
<-
wu_annot
$
group
[
m
]
sampleid
<-
wu_annot
$
subject
[
m
]
mdt
<-
data.frame
(
sampleid
,
status
)
rownames
(
mdt
)
<-
colnames
(
counts_cd34
)
GSE196052
<-
CreateSeuratObject
(
counts
=
counts_cd34
,
meta.data
=
mdt
)
GSE196052
<-
SetIdent
(
object
=
GSE196052
,
value
=
"orig.ident"
)
saveRDS
(
object
=
GSE196052
,
file
=
paste0
(
path
,
"GSE196052.rds"
))
## 4.2.4) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
"path"
]
rm
(
list
=
l
);
gc
()
# 5) Define single seurat object
## 5.1) load datasets
fiumara
<-
readRDS
(
paste0
(
path
,
"BM_CD34p.rds"
))
GSE180298
<-
readRDS
(
paste0
(
path
,
"GSE180298_elderly.rds"
))
GSE196052
<-
readRDS
(
paste0
(
path
,
"GSE196052.rds"
))
## 5.2) define common features
genes
<-
table
(
c
(
rownames
(
fiumara
),
rownames
(
GSE180298
),
rownames
(
GSE196052
)))
common_genes
<-
names
(
genes
)[
genes
==
3
];
length
(
common_genes
)
## 5.3) add sampleinfo
fiumara
$
source
<-
"fiumara"
fiumara
$
sample_info
<-
paste0
(
"fiumara_"
,
fiumara
$
orig.ident
)
GSE180298
$
source
<-
"GSE180298"
GSE180298
$
sample_info
<-
paste0
(
"GSE180298_"
,
GSE180298
$
orig.ident
)
GSE196052
$
source
<-
"GSE196052"
GSE196052
$
sample_info
<-
paste0
(
"GSE196052_"
,
GSE196052
$
sampleid
)
## 5.4) extract counts relative to these genes and define Seurat object
counts
<-
cbind
(
fiumara
@
assays
$
RNA
@
counts
[
common_genes
,],
GSE180298
@
assays
$
RNA
@
counts
[
common_genes
,],
GSE196052
@
assays
$
RNA
@
counts
[
common_genes
,])
vars
<-
c
(
"source"
,
"sample_info"
,
"nCount_RNA"
,
"nFeature_RNA"
)
md
<-
rbind
(
fiumara
@
meta.data
[,
vars
],
GSE180298
@
meta.data
[,
vars
],
GSE196052
@
meta.data
[,
vars
])
obj
<-
CreateSeuratObject
(
counts
=
counts
,
meta.data
=
md
)
obj
$
percent.mt
<-
(
colSums
(
obj
@
assays
$
RNA
@
counts
[
grepl
(
rownames
(
obj
),
pattern
=
"^MT-"
),])
/
colSums
(
obj
@
assays
$
RNA
@
counts
))
*
100
obj
$
percent.rb
<-
(
colSums
(
obj
@
assays
$
RNA
@
counts
[
grepl
(
rownames
(
obj
),
pattern
=
"^RPL"
),])
/
colSums
(
obj
@
assays
$
RNA
@
counts
))
*
100
saveRDS
(
object
=
obj
,
file
=
paste0
(
path
,
"combined.rds"
));
gc
()
## 5.5) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
c
(
"path"
,
"obj"
)]
rm
(
list
=
l
);
gc
()
# 6) removing low quality cells, normalization, scaling, integration and clustering
## 6.1) cell filtering
obj
$
keep
<-
(
obj
$
nFeature_RNA
>
200
)
&
(
obj
$
percent.mt
<
25
)
table
(
obj
$
keep
,
obj
$
source
)
obj
<-
subset
(
obj
,
subset
=
(
keep
%in%
TRUE
))
saveRDS
(
obj
,
file
=
paste0
(
path
,
"combined_filtered.rds"
))
## 6.2) normalization
varfeatures
<-
1000
obj
<-
NormalizeData
(
object
=
obj
)
obj
<-
FindVariableFeatures
(
obj
,
selection.method
=
"vst"
,
nfeatures
=
varfeatures
,
verbose
=
T
)
## 6.3) scaling
reg_vars
=
c
(
"percent.mt"
,
"nCount_RNA"
)
obj
<-
ScaleData
(
object
=
obj
,
vars.to.regress
=
reg_vars
,
display.progress
=
T
,
features
=
rownames
(
obj
))
saveRDS
(
obj
,
file
=
paste0
(
path
,
"combined_normscaled.rds"
))
## 6.4) dimensionality reduction
max_pca
<-
100
obj
<-
RunPCA
(
object
=
obj
,
features
=
VariableFeatures
(
object
=
obj
),
npcs
=
max_pca
,
reduction.name
=
"pca"
,
reduction.key
=
"PC_"
)
explvar
<-
((
obj
@
reductions
$
pca
@
stdev
^
2
)
/
sum
((
obj
@
reductions
$
pca
@
stdev
^
2
)))
*
100
delta
<-
explvar
-
c
(
explvar
[
-1
],
0
)
opt_delta
<-
length
(
delta
[
delta
>
1e-2
])
opt_explvar
<-
min
(
which
(
cumsum
(
explvar
)
>
80
))
opt
<-
min
(
opt_delta
,
opt_explvar
)
obj
<-
RunUMAP
(
object
=
obj
,
seed.use
=
123
,
reduction
=
"pca"
,
dims
=
1
:
opt
)
## 6.5) harmony dataset integration
integration.var
<-
c
(
"source"
,
"sample_info"
)
obj
<-
RunHarmony
(
object
=
obj
,
group.by.vars
=
integration.var
,
max.iter.harmony
=
30
,
plot_convergence
=
FALSE
,
reduction.save
=
"harmony"
)
obj
<-
RunUMAP
(
object
=
obj
,
seed.use
=
123
,
dims
=
1
:
opt
,
reduction
=
"harmony"
,
reduction.name
=
"harmony_umap"
,
reduction.key
=
"UMAPh_"
,
return.model
=
TRUE
)
## 6.6) find neighboring cells
obj
<-
FindNeighbors
(
object
=
obj
,
dims
=
1
:
opt
,
force.recalc
=
T
,
reduction
=
"harmony"
,
graph.name
=
c
(
"RNA_nn_h.iv"
,
"RNA_snn_h.iv"
))
## 6.7) cell clustering
clu_res
<-
seq
(
0.1
,
1
,
by
=
0.1
)
for
(
res
in
clu_res
){
obj
<-
FindClusters
(
object
=
obj
,
algorithm
=
1
,
resolution
=
as.numeric
(
res
),
graph.name
=
"RNA_snn_h.iv"
)
}
## 6.8) assign celltype to each cluster
obj
$
seurat
<-
obj
$
RNA_snn_h.iv_res.0.5
obj
$
seurat_annotation
<-
NA
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"0"
]
<-
"HSC/MPP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"1"
,
"6"
,
"11"
)]
<-
"Mature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"7"
]
<-
"VEXAS Ery/CMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"8"
,
"22"
)]
<-
"Immature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"2"
]
<-
"MPP/CMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"12"
]
<-
"PreBNK"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"3"
]
<-
"CMP/GMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"4"
]
<-
"GP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"13"
]
<-
"MDP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"5"
]
<-
"MyeloLympho/CMP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"9"
,
"21"
)]
<-
"VEXAS Immature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"10"
]
<-
"Undefined"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"14"
]
<-
"BEM"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"16"
]
<-
"Monocyte Progenitors"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
c
(
"15"
,
"19"
)]
<-
"MLP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"17"
]
<-
"MEP"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"18"
]
<-
"VEXAS Mature Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"20"
]
<-
"VEXAS MPP-Ery"
obj
$
seurat_annotation
[
obj
$
seurat
%in%
"23"
]
<-
"VEXAS Myelo/CMP"
annotated_cols
<-
c
(
"HSC/MPP"
=
'#FB0207'
,
"MyeloLympho/CMP"
=
'#c6dbef'
,
"MPP/CMP"
=
"#7fcdbb"
,
"CMP/GMP"
=
'#9ecae1'
,
"Monocyte Progenitors"
=
'#66CCFF'
,
"MDP"
=
'#0F80FF'
,
"GP"
=
"#08519c"
,
"PreBNK"
=
'#118040'
,
"MLP"
=
"#FECC66"
,
"BEM"
=
"#bdbdbd"
,
"MEP"
=
'#f768a1'
,
"Immature Ery"
=
'#B17DFC'
,
"Mature Ery"
=
"#800080"
,
"VEXAS MPP-Ery"
=
'#fde0dd'
,
"VEXAS Ery/CMP"
=
'#fcc5c0'
,
"VEXAS Immature Ery"
=
'#fa9fb5'
,
"VEXAS Mature Ery"
=
"#dd3497"
,
"VEXAS Myelo/CMP"
=
"#ccebc5"
,
"Undefined"
=
'#d9d9d9'
)
levs
<-
names
(
annotated_cols
)
obj
$
seurat_annotation
<-
factor
(
obj
$
seurat_annotation
,
levels
=
levs
)
## 6.9) define status and vexas-mutation variables
obj
$
cell_barcode
<-
strsplit2
(
rownames
(
obj
@
meta.data
),
split
=
"\\_"
)[,
2
]
GSE196052_annot
<-
read.csv
(
file
=
paste0
(
path
,
"GSE196052/CD34_metaDatatSNECellType_ALiceManual.csv"
),
header
=
T
)
GSE196052_cases
<-
GSE196052_annot
[
GSE196052_annot
$
group
%in%
"PT"
,]
### 6.9.1) status
obj
$
status
<-
"HD"
obj
$
status
[(
obj
$
source
%in%
"GSE196052"
)
&
(
obj
$
cell_barcode
%in%
GSE196052_cases
$
orig.ident
)]
<-
"PT"
obj
$
status
[(
obj
$
source
%in%
"fiumara"
)]
<-
"PT"
table
(
obj
$
source
,
obj
$
status
)
### 6.9.2) VEXAS mutation
GSE196052_pt2upn
<-
paste0
(
"GSE196052_PT"
,
1
:
9
)
names
(
GSE196052_pt2upn
)
<-
paste0
(
"GSE196052_UPN"
,
c
(
6
,
11
,
1
,
10
,
13
,
14
,
15
,
16
,
17
))
obj
$
sample_info_upn
<-
obj
$
sample_info
for
(
i
in
seq_len
(
length
(
GSE196052_pt2upn
))){
obj
$
sample_info_upn
[
obj
$
sample_info_upn
%in%
GSE196052_pt2upn
[
i
]]
<-
names
(
GSE196052_pt2upn
)[
i
]
}
table
(
obj
$
source
,
obj
$
sample_info_upn
)
table
(
obj
$
sample_info
,
obj
$
sample_info_upn
)
obj
$
vexas_mutation
<-
"HD"
obj
$
vexas_mutation
[
obj
$
sample_info_upn
%in%
c
(
paste0
(
"fiumara_BM-0"
,
1
),
paste0
(
"GSE196052_UPN"
,
c
(
1
,
10
,
11
,
13
,
16
,
17
)))]
<-
"THR"
obj
$
vexas_mutation
[
obj
$
sample_info_upn
%in%
c
(
paste0
(
"fiumara_BM-0"
,
c
(
2
,
3
,
8
)),
paste0
(
"GSE196052_UPN"
,
c
(
6
)))]
<-
"VAL"
obj
$
vexas_mutation
[
obj
$
sample_info_upn
%in%
c
(
paste0
(
"fiumara_BM-0"
,
c
(
4
,
9
)),
paste0
(
"GSE196052_UPN"
,
c
(
14
,
15
)))]
<-
"LEU"
saveRDS
(
obj
,
file
=
paste0
(
path
,
"combined_annotated.rds"
))
table
(
obj
$
status
,
obj
$
vexas_mutation
)
table
(
obj
$
source
,
obj
$
vexas_mutation
)
table
(
obj
$
vexas_mutation
,
obj
$
sample_info_upn
)
## 6.10) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
c
(
"path"
,
"obj"
)]
rm
(
list
=
l
);
gc
()
# 7) Celltype-wise VEXAS vs HD (DE and GSEA)
outpath
<-
paste0
(
path
,
"DE_GSEA/"
);
dir.create
(
path
=
outpath
,
showWarnings
=
F
,
recursive
=
T
)
## 7.1) define function to run clusterProfiler GSEA
gsea_run
<-
function
(
marks
,
gmt
){
# load gmt file
gmt.obj
<-
clusterProfiler
::
read.gmt
(
gmt
)
# order DE results by logFC
genes
<-
marks
$
avg_log2FC
names
(
genes
)
<-
marks
$
gene_name
genes
<-
genes
[
order
(
genes
,
decreasing
=
T
)]
genes
<-
genes
[
!
duplicated
(
names
(
genes
))]
# run GSEA
gsea
<-
GSEA
(
geneList
=
genes
,
TERM2GENE
=
gmt.obj
,
nPerm
=
10000
,
pvalueCutoff
=
1
)
return
(
gsea
)
}
## 7.2) download hallmarks gene sets
hallmarks_gsea
<-
c
(
"https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.4/h.all.v7.4.symbols.gmt"
)
download.file
(
url
=
hallmarks_gsea
,
destfile
=
paste0
(
outpath
,
"h.all.v7.4.symbols.gmt"
))
## 7.3) VEXAS vs HD
### 7.3.1) DE analysis
annclusters
<-
names
(
annotated_cols
)
mincells
<-
10
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
temp
<-
subset
(
obj
,
subset
=
(
seurat_annotation
%in%
cl
))
temp
<-
SetIdent
(
object
=
temp
,
value
=
"status"
)
if
(
all
(
table
(
temp
$
status
)
>=
mincells
)){
de
<-
FindMarkers
(
temp
,
ident.1
=
"PT"
,
ident.2
=
"HD"
,
test.use
=
"wilcox"
,
min.pct
=
0.1
,
logfc.threshold
=
0
)
marks
<-
de
[
order
(
de
$
p_val_adj
,
decreasing
=
F
),]
marks
$
gene_name
<-
rownames
(
marks
)
write.table
(
x
=
marks
,
file
=
paste0
(
outpath
,
"de_"
,
cl_id
,
".txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
### 7.3.2) GSEA Hallmarks
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
defile
<-
paste0
(
outpath
,
'de_'
,
cl_id
,
".txt"
)
if
(
file.exists
(
defile
)){
marks
<-
read.table
(
file
=
defile
,
sep
=
"\t"
,
header
=
T
)
gsea
<-
gsea_run
(
marks
,
gmt
=
hallmarks_gsea
)
write.table
(
x
=
gsea
,
file
=
paste0
(
outpath
,
'gsea_'
,
cl_id
,
".txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
## 7.4) VEXAS_MUT vs HD
### 7.4.1) DE analysis
annclusters
<-
names
(
annotated_cols
)
mincells
<-
10
mut
<-
c
(
"LEU"
,
"THR"
,
"VAL"
)
for
(
m
in
mut
){
levs
<-
c
(
m
,
"HD"
)
sub
<-
subset
(
obj
,
subset
=
(
vexas_mutation
%in%
levs
))
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
temp
<-
subset
(
sub
,
subset
=
(
seurat_annotation
%in%
cl
))
temp
<-
SetIdent
(
object
=
temp
,
value
=
"vexas_mutation"
)
if
(
all
(
table
(
temp
$
vexas_mutation
)
>=
mincells
)){
de
<-
FindMarkers
(
temp
,
ident.1
=
m
,
ident.2
=
"HD"
,
test.use
=
"wilcox"
,
min.pct
=
0.1
,
logfc.threshold
=
0
)
marks
<-
de
[
order
(
de
$
p_val_adj
,
decreasing
=
F
),]
marks
$
gene_name
<-
rownames
(
marks
)
write.table
(
x
=
marks
,
file
=
paste0
(
outpath
,
"de_"
,
cl_id
,
"_"
,
m
,
"vHD.txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
}
### 7.4.2) GSEA Hallmarks
for
(
m
in
mut
){
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
defile
<-
paste0
(
outpath
,
"de_"
,
cl_id
,
"_"
,
m
,
"vHD.txt"
)
if
(
file.exists
(
defile
)){
marks
<-
read.table
(
file
=
defile
,
sep
=
"\t"
,
header
=
T
)
gsea
<-
gsea_run
(
marks
,
gmt
=
hallmarks_gsea
)
write.table
(
x
=
gsea
,
file
=
paste0
(
outpath
,
'gsea_'
,
cl_id
,
"_"
,
m
,
"vHD.txt"
),
sep
=
'\t'
,
row.names
=
F
)
}
}
}
## 7.5) free memory space
l
<-
ls
()
l
<-
l
[
!
l
%in%
c
(
"path"
,
"obj"
)]
rm
(
list
=
l
);
gc
()
# 8) UCell module scores and wilcoxon test
## 8.1) load marker gene set
vexas_50
<-
qusage
::
read.gmt
(
file
=
paste0
(
path
,
"xenograft_signatures/custom_vexas_50.gmt"
))
vexas_signature
<-
vexas_50
[[
1
]]
## 8.2) compute UCell module scores
names
(
vexas_signature
)
<-
"VEXAS_Xenograft_sig50"
ncol
<-
ncol
(
obj
@
meta.data
)
obj
<-
AddModuleScore_UCell
(
obj
,
features
=
vexas_signature
)
colnames
(
obj
@
meta.data
)
<-
c
(
colnames
(
obj
@
meta.data
)[
seq_len
(
ncol
)],
names
(
vexas_signature
))
## 8.3) Celltype-wise wilcoxon test: VEXAS vs HD
x
<-
melt
(
data
=
obj
@
meta.data
,
id.vars
=
c
(
"status"
,
"seurat_annotation"
),
measure.vars
=
c
(
"VEXAS_Xenograft_sig50"
))
w
<-
x
%>%
dplyr
::
group_by
(
variable
,
seurat_annotation
)
%>%
dplyr
::
summarise
(
pvalue
=
wilcox.test
(
x
=
value
[
status
==
"PT"
],
y
=
value
[
status
==
"HD"
])
$
p.value
)
w
$
p.adjust
<-
p.adjust
(
p
=
w
$
pvalue
)
w
<-
w
[
order
(
w
$
p.adjust
,
decreasing
=
F
),]
write.table
(
x
=
w
,
file
=
paste0
(
path
,
"xenograft_signatures/wilcoxon.txt"
),
sep
=
"\t"
,
row.names
=
F
,
col.names
=
T
,
quote
=
F
)
# 9) figures
figpath
<-
paste0
(
path
,
"figures/"
);
dir.create
(
path
=
figpath
,
showWarnings
=
F
,
recursive
=
T
)
## 9.1) figure 5d: Annotated UMAP
obj
<-
SetIdent
(
object
=
obj
,
value
=
"seurat_annotation"
)
g
<-
DimPlot
(
obj
,
reduction
=
"harmony_umap"
,
label.box
=
T
,
label
=
T
,
label.color
=
T
,
label.size
=
2
)
+
scale_color_manual
(
values
=
annotated_cols
,
limits
=
levs
)
+
scale_fill_manual
(
values
=
annotated_cols
,
limits
=
levs
)
+
ggtitle
(
"Cluster Annotation"
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
),
legend.position
=
"right"
,
legend.text
=
element_text
(
size
=
7
))
ggsave
(
g
,
filename
=
paste0
(
figpath
,
"Fig5D_UMAP_AnnotatedClusters.png"
),
width
=
10
,
height
=
7
,
limitsize
=
FALSE
)
## 9.2) figure 5e/5f: GSEA Hallmarks
### 9.2.1) load results
res_VEXASvHD
<-
c
()
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
file
<-
paste0
(
outpath
,
'gsea_'
,
cl_id
,
".txt"
)
if
(
file.exists
(
file
)){
x
<-
read.delim
(
file
=
file
,
header
=
T
)
x
<-
x
[
order
(
x
$
p.adjust
,
-
x
$
NES
),]
res_VEXASvHD
<-
rbind
(
res_VEXASvHD
,
data.frame
(
x
,
celltype
=
cl
,
test
=
"VEXASvOLD"
))
}
}
res_MUTvHD
<-
c
()
for
(
m
in
mut
){
for
(
i
in
seq_len
(
length
(
annclusters
))){
cl
<-
annclusters
[
i
]
cl_id
<-
gsub
(
x
=
cl
,
pattern
=
"/"
,
replacement
=
"-"
)
file
<-
paste0
(
outpath
,
'gsea_'
,
cl_id
,
"_"
,
m
,
"vHD.txt"
)
if
(
file.exists
(
file
)){
id
<-
paste0
(
m
,
"vHD"
)
x
<-
read.delim
(
file
=
file
,
header
=
T
)
x
<-
x
[
order
(
x
$
p.adjust
,
-
x
$
NES
),]
res_MUTvHD
<-
rbind
(
res_MUTvHD
,
data.frame
(
x
,
celltype
=
cl
,
test
=
id
))
}
}
}
res
<-
rbind
(
res_VEXASvHD
,
res_MUTvHD
)
res
$
significance_asterisk
<-
""
res
$
significance_asterisk
[
res
$
p.adjust
<
0.05
]
<-
"*"
res
$
significance_asterisk
[
res
$
p.adjust
<
0.01
]
<-
"**"
res
$
significance_asterisk
[
res
$
p.adjust
<
0.001
]
<-
"***"
res
<-
res
%>%
tidyr
::
complete
(
ID
,
celltype
,
test
)
%>%
as.data.frame
()
res
$
ID
<-
gsub
(
x
=
res
$
ID
,
pattern
=
"HALLMARK_"
,
replacement
=
""
)
### 9.2.2) plot
g
<-
res
%>%
ggplot
()
+
theme_bw
()
+
facet_grid
(
.
~
test
)
+
geom_tile
(
aes
(
x
=
celltype
,
y
=
ID
,
fill
=
NES
))
+
geom_text
(
aes
(
x
=
celltype
,
y
=
ID
,
label
=
significance_asterisk
),
size
=
2
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
,
size
=
10
),
axis.text.x
=
element_text
(
angle
=
45
,
,
vjust
=
1
,
hjust
=
1
),
legend.position
=
"top"
,
strip.background
=
element_rect
(
fill
=
"white"
))
+
ylab
(
""
)
+
xlab
(
""
)
+
scale_fill_gradientn
(
colours
=
colorRampPalette
(
rev
(
brewer.pal
(
11
,
"RdBu"
)))(
100
),
limits
=
c
(
-4
,
4
),
na.value
=
"grey"
)
ggsave
(
g
,
filename
=
paste0
(
figpath
,
"Fig5EF_GSEA_Celltype_Hallmarks.png"
),
width
=
length
(
unique
(
res
$
celltype
))
*
0.3
*
length
(
unique
(
res_complete
$
test
)),
height
=
length
(
unique
(
res
$
ID
))
*
0.2
,
limitsize
=
FALSE
)
## 9.3) figure 5g: Monocyte xenograft signature CD34+
### 9.3.1) load wilcoxon test results
w
<-
read.table
(
file
=
paste0
(
path
,
"xenograft_signatures/wilcoxon.txt"
),
sep
=
"\t"
,
header
=
T
)
w
$
significance_asterisk
<-
""
w
$
significance_asterisk
[
w
$
p.adjust
<
0.05
]
<-
"*"
w
$
significance_asterisk
[
w
$
p.adjust
<
0.01
]
<-
"**"
w
$
significance_asterisk
[
w
$
p.adjust
<
0.001
]
<-
"***"
### 9.3.2) plot
g
<-
obj
@
meta.data
%>%
ggplot
()
+
theme_classic
()
+
geom_violin
(
aes
(
x
=
seurat_annotation
,
y
=
VEXAS_Xenograft_sig50
,
fill
=
status
),
scale
=
"width"
)
+
geom_text
(
data
=
w
,
aes
(
x
=
seurat_annotation
,
y
=
0.7
,
label
=
significance_asterisk
))
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
,
size
=
10
),
axis.text.x
=
element_text
(
angle
=
90
,
vjust
=
0.5
,
hjust
=
1
),
legend.position
=
"right"
)
+
ylab
(
"UCell Module score"
)
+
xlab
(
""
)
+
ggtitle
(
"VEXAS xenograft signature (n = 50)"
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
,
vjust
=
1
,
hjust
=
1
))
+
scale_fill_manual
(
values
=
adjustcolor
(
col
=
c
(
"#F8766D"
,
"#02818a"
),
alpha.f
=
0.8
),
name
=
""
)
ggsave
(
g
,
filename
=
paste0
(
figpath
,
"Fig5G_UCell_MonocyteXenograft_WilcoxonTest.png"
),
width
=
length
(
unique
(
obj
$
seurat_annotation
))
*
5
*
0.1
,
height
=
5
,
limitsize
=
FALSE
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment