Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Zonari_mPBHSCexp_2025
Zonari_mPBHSCexp_2025_scRNAseq
Commits
f8a64d30
Commit
f8a64d30
authored
Aug 04, 2025
by
Matteo Barcella
Browse files
script for scGATE annotation of Fig2H dataset using Fig2C dataset
parent
8716d431
Changes
7
Hide whitespace changes
Inline
Side-by-side
Complete_list_of_markers_x_scGATE.rds
0 → 100644
View file @
f8a64d30
File added
Complete_list_of_pctmarkers.rds
0 → 100644
View file @
f8a64d30
File added
WPRE_obj.rds
0 → 100644
View file @
f8a64d30
File added
colpalette_refannots.clustering.rds
0 → 100644
View file @
f8a64d30
File added
markersdir_scGate_input/markers_genes_neg.rds
0 → 100644
View file @
f8a64d30
File added
markersdir_scGate_input/markers_genes_pos.rds
0 → 100644
View file @
f8a64d30
File added
scGate_Fig2C_dataset_to_Fig2H_dataset.R
0 → 100644
View file @
f8a64d30
# Automatic Annotation of Fig2H dataset using scGATE leveraging Fig2C dataset as reference dataset
library
(
ggplot2
)
library
(
dplyr
)
library
(
RColorBrewer
)
library
(
UCell
)
library
(
scGate
)
library
(
viridis
)
library
(
stringr
)
library
(
patchwork
)
library
(
dittoSeq
)
library
(
gridExtra
)
library
(
Seurat
)
library
(
openxlsx
)
Fig2H_dataset
<-
readRDS
(
"Fig2H_dataset.rds"
)
# from pipeline
DefaultAssay
(
Fig2H_dataset
)
<-
"alra"
length
(
Features
(
Fig2H_dataset
))
# 14273
length
(
VariableFeatures
(
Fig2H_dataset
))
# 2141
plotdir
<-
"plots/"
dir.create
(
path
=
plotdir
)
#### SIGNATURES ####
# markers from Fig2C dataset
markers_lists
<-
list
(
level1
=
NULL
,
level2
=
NULL
)
dat01_markers_1
<-
readRDS
(
"Full_GSEA_markers_Population.rds"
)
dat01_markers_2
<-
readRDS
(
"Full_GSEA_markers_Classification.rds"
)
markers_lists
[[
"level1"
]]
<-
dat01_markers_1
markers_lists
[[
"level2"
]]
<-
dat01_markers_2
markers_genes_pos
<-
list
(
level1
=
list
(),
level2
=
list
())
markers_genes_neg
<-
list
(
level1
=
list
(),
level2
=
list
())
thresh.pct
<-
0.2
ntop
<-
15
for
(
lev
in
names
(
markers_lists
)){
markers_lists
[[
lev
]]
$
pctDelta
<-
markers_lists
[[
lev
]]
$
pct.1
-
markers_lists
[[
lev
]]
$
pct.2
for
(
cl
in
unique
(
markers_lists
[[
lev
]]
$
cluster
))
{
tmpdata
<-
subset.data.frame
(
markers_lists
[[
lev
]],
cluster
==
cl
)
# pickup top pos genes according to logFC and threshold
tmp.pos
<-
subset
(
tmpdata
,
subset
=
pctDelta
>
0.2
)
print
(
head
(
tmp.pos
))
markers_genes_pos
[[
lev
]][[
cl
]]
<-
head
(
tmp.pos
[
order
(
tmp.pos
$
avg_log2FC
,
decreasing
=
T
),],
ntop
)
$
gene
# pickup top neg genes according to logFC and threshold
tmp.neg
<-
subset
(
tmpdata
,
subset
=
pctDelta
<
-
0.2
)
markers_genes_neg
[[
lev
]][[
cl
]]
<-
head
(
tmp.neg
[
order
(
tmp.neg
$
avg_log2FC
,
decreasing
=
F
),],
ntop
)
$
gene
}
}
markers_listdir
<-
"markersdir_scGate_input/"
dir.create
(
markers_listdir
)
saveRDS
(
markers_genes_neg
,
file
=
paste0
(
markers_listdir
,
"markers_genes_neg.rds"
))
saveRDS
(
markers_genes_pos
,
file
=
paste0
(
markers_listdir
,
"markers_genes_pos.rds"
))
# Inspecting % of expressing cells related to each set of genes ####
markers_genes
<-
list
(
pos
=
markers_genes_pos
,
neg
=
markers_genes_neg
)
pct_genes
<-
markers_genes
for
(
direction
in
names
(
markers_genes
)){
for
(
annlev
in
names
(
markers_genes
[[
direction
]])){
for
(
cl
in
names
(
markers_genes
[[
direction
]][[
annlev
]])){
pct_genes
[[
direction
]][[
annlev
]][[
cl
]]
<-
NULL
if
(
length
(
markers_genes
[[
direction
]][[
annlev
]][[
cl
]])
>
0
){
for
(
cur.gene
in
markers_genes
[[
direction
]][[
annlev
]][[
cl
]]){
test
<-
FetchData
(
object
=
Fig2H_dataset
,
vars
=
c
(
"Patient"
,
"Timepoint"
,
markers_genes
[[
direction
]][[
annlev
]][[
cl
]]))
pct_genes
[[
direction
]][[
annlev
]][[
cl
]][
cur.gene
]
<-
prop.table
(
table
(
test
[[
cur.gene
]]
>
0
))[
"TRUE"
]
}
}
else
{
next
}
}
}
}
saveRDS
(
object
=
markers_genes
,
"Complete_list_of_markers_x_scGATE.rds"
)
saveRDS
(
object
=
pct_genes
,
"Complete_list_of_pctmarkers.rds"
)
#### Gating model ####
gate_models
<-
markers_genes
gate_models
<-
NULL
gate_models_onlypos
<-
NULL
for
(
direction
in
names
(
markers_genes
)){
for
(
annlev
in
names
(
markers_genes
[[
direction
]])){
for
(
cl
in
names
(
markers_genes
[[
direction
]][[
annlev
]])){
if
(
length
(
markers_genes
[[
direction
]][[
annlev
]][[
cl
]])
>
0
){
gate_models
[[
annlev
]][[
cl
]]
<-
scGate
::
gating_model
(
name
=
paste0
(
direction
,
"_"
,
annlev
,
"_"
,
cl
),
model
=
gate_models
[[
annlev
]][[
cl
]]
,
signature
=
markers_genes
[[
direction
]][[
annlev
]][[
cl
]],
level
=
gsub
(
x
=
annlev
,
pattern
=
"level"
,
replacement
=
""
),
positive
=
ifelse
(
test
=
direction
==
"pos"
,
TRUE
,
FALSE
))
if
(
direction
==
"pos"
){
gate_models_onlypos
[[
annlev
]][[
cl
]]
<-
scGate
::
gating_model
(
name
=
paste0
(
direction
,
"_"
,
annlev
,
"_"
,
cl
),
model
=
gate_models_onlypos
[[
annlev
]][[
cl
]]
,
signature
=
markers_genes
[[
direction
]][[
annlev
]][[
cl
]],
level
=
gsub
(
x
=
annlev
,
pattern
=
"level"
,
replacement
=
""
),
positive
=
ifelse
(
test
=
direction
==
"pos"
,
TRUE
,
FALSE
))
}
else
{
next
}
}
else
{
next
}
}
}
}
Fig2H_dataset.backup
<-
Fig2H_dataset
Fig2H_dataset
<-
scGate
(
data
=
Fig2H_dataset
,
model
=
gate_models
$
level1
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"is.pure"
,
replacement
=
"is.pure.level1"
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"scGate_multi"
,
replacement
=
"scGate_multi.level1"
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"CellOntology_ID"
,
replacement
=
"CellOntology_ID.level1"
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"CellOntology_name"
,
replacement
=
"CellOntology_name.level1"
)
Fig2H_dataset
<-
scGate
(
data
=
Fig2H_dataset
,
model
=
gate_models
$
level2
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"^is.pure_"
,
perl
=
T
,
replacement
=
"is.pure.level2_"
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"^scGate_multi$"
,
perl
=
T
,
replacement
=
"scGate_multi.level2"
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"^CellOntology_ID$"
,
perl
=
T
,
replacement
=
"CellOntology_ID.level2"
)
colnames
(
Fig2H_dataset
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset
@
meta.data
),
pattern
=
"^CellOntology_name$"
,
perl
=
T
,
replacement
=
"CellOntology_name.level2"
)
Fig2H_dataset_onlypos
<-
Fig2H_dataset.backup
Fig2H_dataset_onlypos
<-
scGate
(
data
=
Fig2H_dataset_onlypos
,
model
=
gate_models_onlypos
$
level1
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"is.pure"
,
replacement
=
"is.pure.level1"
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"scGate_multi"
,
replacement
=
"scGate_multi.level1"
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"CellOntology_ID"
,
replacement
=
"CellOntology_ID.level1"
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"CellOntology_name"
,
replacement
=
"CellOntology_name.level1"
)
Fig2H_dataset_onlypos
<-
scGate
(
data
=
Fig2H_dataset_onlypos
,
model
=
gate_models_onlypos
$
level2
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"^is.pure_"
,
perl
=
T
,
replacement
=
"is.pure.level2_"
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"^scGate_multi$"
,
perl
=
T
,
replacement
=
"scGate_multi.level2"
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"^CellOntology_ID$"
,
perl
=
T
,
replacement
=
"CellOntology_ID.level2"
)
colnames
(
Fig2H_dataset_onlypos
@
meta.data
)
<-
gsub
(
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
pattern
=
"^CellOntology_name$"
,
perl
=
T
,
replacement
=
"CellOntology_name.level2"
)
saveRDS
(
object
=
Fig2H_dataset
,
file
=
"Fig2H_dataset_annotated_with_scGATE_pos_neg_d01.rds"
)
saveRDS
(
object
=
Fig2H_dataset_onlypos
,
file
=
"Fig2H_dataset_annotated_with_scGATE_pos_only_d01.rds"
)
# Visualize gating results
colpalette_lvl1
<-
colorRampPalette
(
brewer.pal
(
12
,
name
=
"Paired"
))(
length
(
unique
(
Fig2H_dataset
@
meta.data
$
scGate_multi.level1
)))
png
(
filename
=
paste0
(
plotdir
,
"scGATE_level1_multi_pos_neg.png"
),
width
=
4
,
height
=
6
,
units
=
"in"
,
res
=
300
)
dittoSeq
::
dittoDimPlot
(
object
=
Fig2H_dataset
,
reduction.use
=
"umap.harmony.pt.tp"
,
var
=
"scGate_multi.level1"
,
opacity
=
0.8
,
do.label
=
T
,
labels.size
=
3.5
,
labels.highlight
=
F
,
labels.repel
=
T
,
size
=
0.6
)
+
scale_color_manual
(
values
=
colpalette_lvl1
)
+
theme
(
legend.position
=
"bottom"
,
plot.title
=
element_blank
(),
axis.line
=
element_line
(
color
=
"black"
),
axis.text
=
element_text
(
size
=
12
,
color
=
"black"
))
dev.off
()
# Re-coding e re-finement of scGATE annotation:
# Provisional annotation: Provv_Annotation variable
Fig2H_dataset_onlypos
$
Provv_Annotation
<-
NA
for
(
i
in
1
:
nrow
(
Fig2H_dataset_onlypos
))
{
scGate_multi
<-
Fig2H_dataset_onlypos
@
meta.data
[[
"scGate_multi.level1"
]][
i
]
if
(
!
is.na
(
scGate_multi
)
&&
scGate_multi
==
"Multi"
)
{
is_pure_HSC
<-
Fig2H_dataset_onlypos
@
meta.data
[[
"is.pure.level1_HSC"
]][
i
]
is_pure_HSPC
<-
Fig2H_dataset_onlypos
@
meta.data
[[
"is.pure.level1_HSPC"
]][
i
]
is_pure_Mye_Lineage
<-
Fig2H_dataset_onlypos
@
meta.data
[[
"is.pure.level1_Mye Lineage"
]][
i
]
is_pure_Mk_Ery_Lineage
<-
Fig2H_dataset_onlypos
@
meta.data
[[
"is.pure.level1_Mk-Ery Lineage"
]][
i
]
hsc_pure
<-
!
is.na
(
is_pure_HSC
)
&&
is_pure_HSC
==
"Pure"
hspc_pure
<-
!
is.na
(
is_pure_HSPC
)
&&
is_pure_HSPC
==
"Pure"
mye_pure
<-
!
is.na
(
is_pure_Mye_Lineage
)
&&
is_pure_Mye_Lineage
==
"Pure"
ery_pure
<-
!
is.na
(
is_pure_Mk_Ery_Lineage
)
&&
is_pure_Mk_Ery_Lineage
==
"Pure"
if
(
!
is.na
(
hsc_pure
)
&&
hsc_pure
&&
!
is.na
(
hspc_pure
)
&&
hspc_pure
)
{
Fig2H_dataset_onlypos
$
Provv_Annotation
[
i
]
<-
"HSC"
}
else
if
(
!
is.na
(
hspc_pure
)
&&
hspc_pure
&&
!
is.na
(
hsc_pure
)
&&
!
hsc_pure
)
{
Fig2H_dataset_onlypos
$
Provv_Annotation
[
i
]
<-
"HSPC"
}
else
if
(
!
is.na
(
hsc_pure
)
&&
hsc_pure
&&
!
is.na
(
mye_pure
)
&&
mye_pure
)
{
Fig2H_dataset_onlypos
$
Provv_Annotation
[
i
]
<-
"Undefined"
}
else
if
(
!
is.na
(
hsc_pure
)
&&
hsc_pure
&&
!
is.na
(
ery_pure
)
&&
ery_pure
)
{
Fig2H_dataset_onlypos
$
Provv_Annotation
[
i
]
<-
"Undefined"
}
else
{
Fig2H_dataset_onlypos
$
Provv_Annotation
[
i
]
<-
"TBD"
}
}
else
{
if
(
!
is.na
(
scGate_multi
))
{
Fig2H_dataset_onlypos
$
Provv_Annotation
[
i
]
<-
scGate_multi
}
else
{
Fig2H_dataset_onlypos
$
Provv_Annotation
[
i
]
<-
"Unknown"
# If NA assign Unknown
}
}
}
# Additional TBD refinement
TBD_ref
<-
FetchData
(
object
=
Fig2H_dataset_onlypos
,
vars
=
c
(
"Provv_Annotation"
,
grep
(
pattern
=
"is.pure.level1_"
,
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
value
=
T
),
"scGate_multi.level1"
)
)
library
(
tidyr
)
df_unito
<-
unite
(
TBD_ref
,
comboannot
,
grep
(
pattern
=
"is.pure.level1_"
,
x
=
colnames
(
Fig2H_dataset_onlypos
@
meta.data
),
value
=
T
),
sep
=
""
,
remove
=
TRUE
)
df_unito
$
binary
<-
gsub
(
pattern
=
"Impure"
,
replacement
=
0
,
x
=
df_unito
$
comboannot
)
df_unito
$
binary
<-
gsub
(
pattern
=
"Pure"
,
replacement
=
1
,
x
=
df_unito
$
binary
)
df_unito
$
Provv_Annotation2
<-
df_unito
$
Provv_Annotation
# backup variable
gsub
(
x
=
colnames
(
TBD_ref
)[
2
:
8
],
pattern
=
"is.pure.level1_"
,
replacement
=
""
)
# "HSC" "HSPC" "MonoProg" "Mye Lineage" "MCP" "Mk-Ery Lineage" "Prog-Cycling"
# sharing with cycling goes to other (if single)
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1000001"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"HSC"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0010001"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"MonoProg"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0001001"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Mye Lineage"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0000101"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"MCP"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0000011"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Mk-Ery Lineage"
# handling HSC + diff ---- > Undefined
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1010000"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1010001"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1001000"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1001001"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1000100"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1000101"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1000010"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"1000011"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
# handling mixtures mono-ery ---- > Undefined
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0010010"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0001010"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Undefined"
# MCP + Ery ---> undef
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0000110"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Mk-Ery Lineage"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0000111"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"Mk-Ery Lineage"
# Shared between myeloid lineage and MonoProg -- > myeloid lineage
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0011000"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"MonoProg"
df_unito
$
Provv_Annotation2
[
which
(
df_unito
$
binary
==
"0011001"
&
df_unito
$
scGate_multi.level1
==
"Multi"
)]
<-
"MonoProg"
df_unito
$
RefinedAnnotation
<-
df_unito
$
Provv_Annotation2
mymeta
<-
df_unito
[,
"RefinedAnnotation"
,
drop
=
F
]
Fig2H_dataset_onlypos
<-
AddMetaData
(
object
=
Fig2H_dataset_onlypos
,
metadata
=
mymeta
)
# Dittoseq with final annotation
pleng
<-
length
(
unique
(
Fig2H_dataset_onlypos
@
meta.data
$
RefinedAnnotation
))
colpalette_refannots
<-
sample
(
brewer.pal
(
12
,
name
=
"Paired"
),
pleng
)
Fig2H_dataset_onlypos
@
meta.data
$
RefinedAnnotation
<-
factor
(
x
=
Fig2H_dataset_onlypos
@
meta.data
$
RefinedAnnotation
,
levels
=
c
(
"HSC"
,
"HSPC"
,
"MonoProg"
,
"Mye Lineage"
,
"MCP"
,
"Mk-Ery Lineage"
,
"Prog-Cycling"
,
"Unknown"
,
"Undefined"
),
labels
=
c
(
"HSC"
,
"HSPC"
,
"MonoProg"
,
"Mye Lineage"
,
"MCP"
,
"Mk-Ery Lineage"
,
"Prog-Cycling"
,
"Unk/Und"
,
"Unk/Und"
))
colpalette3
<-
c
(
"#A6CEE3"
,
"#B2DF8A"
,
"#FDBF6F"
,
"#E78AC3"
,
"#FFFF99"
,
"#E31A1C"
,
"darkgrey"
,
"#C4A484"
)
#,"#D3D3D3")
png
(
filename
=
paste0
(
plotdir
,
"scGATE_RefinedAnnotation.png"
),
width
=
6
,
height
=
6
,
units
=
"in"
,
res
=
300
)
dittoSeq
::
dittoDimPlot
(
object
=
Fig2H_dataset_onlypos
,
reduction.use
=
"umap.harmony.pt.tp"
,
var
=
"RefinedAnnotation"
,
opacity
=
0.8
,
do.label
=
T
,
labels.size
=
6
,
labels.highlight
=
F
,
labels.repel
=
T
,
size
=
1.5
)
+
scale_color_manual
(
values
=
colpalette3
)
+
theme
(
legend.position
=
c
(
0.88
,
0.2
),
legend.title
=
element_blank
(),
legend.key.size
=
unit
(
0.001
,
"in"
),
legend.key.spacing
=
unit
(
0
,
"in"
),
plot.title
=
element_blank
(),
axis.line
=
element_line
(
color
=
"black"
),
axis.text
=
element_text
(
size
=
12
,
color
=
"black"
))
dev.off
()
png
(
filename
=
paste0
(
plotdir
,
"DimPlot_PatientID.png"
),
width
=
6
,
height
=
6
,
units
=
"in"
,
res
=
300
)
dittoSeq
::
dittoDimPlot
(
object
=
Fig2H_dataset_onlypos
,
reduction.use
=
"umap.harmony.pt.tp"
,
var
=
"Patient"
,
opacity
=
1
,
do.label
=
T
,
labels.size
=
3.5
,
labels.highlight
=
F
,
labels.repel
=
T
,
size
=
1
)
+
scale_color_manual
(
values
=
colpalette_refannots
)
+
theme
(
legend.position
=
c
(
0.88
,
0.2
),
legend.title
=
element_blank
(),
legend.key.size
=
unit
(
0.001
,
"in"
),
legend.key.spacing
=
unit
(
0
,
"in"
),
plot.title
=
element_blank
(),
axis.line
=
element_line
(
color
=
"black"
),
axis.text
=
element_text
(
size
=
12
,
color
=
"black"
))
dev.off
()
# Adding WPRE INFO
#### WPRE expression ####
WPRE_obj
<-
readRDS
(
"WPRE_obj.rds"
)
WPRE_raw_counts
<-
FetchData
(
object
=
WPRE_obj
,
vars
=
"WPRE"
)
Fig2H_dataset_onlypos
<-
AddMetaData
(
Fig2H_dataset_onlypos
,
WPRE_raw_counts
)
Fig2H_dataset_onlypos
@
meta.data
$
WPRE_condition
<-
ifelse
(
Fig2H_dataset_onlypos
@
meta.data
[
"WPRE"
]
>
0
,
"WPRE-POS"
,
"WPRE-NEG"
)
Fig2H_dataset_onlypos
@
meta.data
$
WPRE_condition
[
is.na
(
Fig2H_dataset_onlypos
@
meta.data
$
WPRE_condition
)]
<-
"WPRE-NEG"
umap_wpre
<-
dittoSeq
::
dittoDimPlot
(
object
=
Fig2H_dataset_onlypos
,
reduction.use
=
"umap.harmony.pt.tp"
,
order
=
"increasing"
,
var
=
"WPRE_condition"
,
opacity
=
0.8
,
do.label
=
F
,
labels.size
=
4
,
labels.highlight
=
F
,
labels.repel
=
T
,
size
=
1
)
+
scale_color_manual
(
values
=
c
(
"#A0A0A0B3"
,
"#FF3333B3"
))
+
xlab
(
"UMAP_1"
)
+
ylab
(
label
=
"UMAP_2"
)
+
theme
(
legend.position
=
"none"
,
# c(0.16,0.9),
legend.title
=
element_blank
(),
legend.key.size
=
unit
(
0.001
,
"in"
),
legend.key.spacing
=
unit
(
0
,
"in"
),
plot.title
=
element_blank
(),
#axis.line = element_line(color = "black"),
axis.text
=
element_text
(
size
=
16
)
)
# Figure 2I
png
(
filename
=
paste0
(
plotdir
,
"WPRE_positivity.png"
),
width
=
4
,
height
=
4
,
units
=
"in"
,
res
=
300
)
umap_wpre
dev.off
()
pdf
(
file
=
paste0
(
plotdir
,
"WPRE_positivity.pdf"
),
width
=
4
,
height
=
4
)
umap_wpre
dev.off
()
library
(
plyr
)
# Create an empty list to store plots
plots_list
<-
list
()
data_list
<-
list
()
data_list_freq
<-
list
()
# Loop through each unique patient
for
(
patient
in
unique
(
Fig2H_dataset_onlypos
@
meta.data
$
Patient
))
{
# Filter the data for the current patient
patient_data
<-
Fig2H_dataset_onlypos
@
meta.data
[
Fig2H_dataset_onlypos
@
meta.data
$
Patient
==
patient
,
]
patient_data
$
Timepoint
<-
factor
(
patient_data
$
Timepoint
,
levels
=
c
(
"dp"
,
"day4"
,
"day7"
))
# Initialize a list to store results for each population within the patient
WPRE_in_each_pop
<-
list
()
WPRE_in_each_pop_numbers
<-
list
()
# Loop through each unique population in the patient-specific metadata
for
(
pops
in
unique
(
patient_data
$
RefinedAnnotation
))
{
# Create the table for each population and calculate proportions
WPRE_in_each_pop
[[
pops
]]
<-
as.data.frame
(
round
(
prop.table
(
table
(
patient_data
[
patient_data
$
RefinedAnnotation
==
pops
,]
$
WPRE_condition
,
patient_data
[
patient_data
$
RefinedAnnotation
==
pops
,]
$
Timepoint
),
2
)
*
100
,
2
))
WPRE_in_each_pop_numbers
[[
pops
]]
<-
as.data.frame
(
table
(
patient_data
[
patient_data
$
RefinedAnnotation
==
pops
,]
$
WPRE_condition
,
patient_data
[
patient_data
$
RefinedAnnotation
==
pops
,]
$
Timepoint
))
WPRE_in_each_pop_numbers
[[
pops
]]
$
CellPopulation
<-
pops
# Add the population (cluster) label to the dataframe
WPRE_in_each_pop
[[
pops
]]
$
CellPopulation
<-
pops
}
# Combine results across populations for the current patient
df_WPRE_res_num
<-
plyr
::
ldply
(
WPRE_in_each_pop_numbers
,
data.frame
)
df_WPRE_res_num
$
Patient
<-
patient
df_WPRE_res
<-
plyr
::
ldply
(
WPRE_in_each_pop
,
data.frame
)
df_WPRE_res
$
Patient
<-
patient
print
(
head
(
df_WPRE_res
))
# Generate the plot for this patient
data_list_freq
[[
patient
]]
<-
df_WPRE_res
data_list
[[
patient
]]
<-
df_WPRE_res_num
}
# Data file S3 sheets
saveRDS
(
object
=
data_list_freq
,
file
=
"WPRE_freqs_data.rds"
)
saveRDS
(
object
=
data_list
,
file
=
"WPRE_ncells_data.rds"
)
write.xlsx
(
x
=
data_list_freq
,
file
=
"WPRE_freqs_data.xlsx"
)
write.xlsx
(
x
=
data_list
,
file
=
"WPRE_ncells_data.xlsx"
)
saveRDS
(
object
=
Fig2H_dataset.rds_onlypos
,
"Fig2H_dataset_scGATE_WPRE.rds"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment