Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
LetteraScala_Ageing
bulk_ATACseq
Commits
bbe3ab96
Commit
bbe3ab96
authored
Dec 05, 2025
by
Teresa Tavella
Browse files
plot
parent
9bc178b7
Changes
1
Hide whitespace changes
Inline
Side-by-side
parse_motif.R
0 → 100644
View file @
bbe3ab96
library
(
tidyverse
)
library
(
VennDiagram
)
library
(
pheatmap
)
library
(
ggplot2
)
library
(
dplyr
)
library
(
readr
)
# parse HOMER known results
parse_homer_known
<-
function
(
file_path
)
{
df
<-
file_path
colnames
(
df
)
<-
str_trim
(
colnames
(
df
))
df
<-
df
%>%
mutate
(
TF_Name
=
str_extract
(
`Motif_Name`
,
"^[^(]+"
),
TF_Family
=
str_extract
(
`Motif_Name`
,
"(?<=\\()[^)]+(?=\\))"
)
)
%>%
replace_na
(
list
(
TF_Family
=
"Unknown"
))
df
$
`Percent_Target`
<-
as.numeric
(
gsub
(
'%'
,
' '
,
df
$
`Percent_Target`
))
df
$
`Percent_Background`
<-
as.numeric
(
gsub
(
'%'
,
' '
,
df
$
`Percent_Background`
))
return
(
df
)
}
wdir
<-
"/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_EL_90-1049491329_ATACseq/results/05-filter_qual_bos_qual25/multibamc"
#results motif
cnames
<-
c
(
'Motif_Name'
,
'Consensus'
,
'P_value'
,
'Log_P_value'
,
'q_value_Benjamini'
,
'# of Target Sequences with Motif(of 1635)'
,
'Percent_Target'
,
'# of Background Sequences with Motif(of 97946)'
,
'Percent_Background'
)
#'TF_Name', 'TF_Family')
open
<-
read.table
(
paste
(
wdir
,
'Motifs-Open_DARs.sorted.bed'
,
'knownResults.txt'
,
sep
=
'/'
),
row.names
=
NULL
,
sep
=
'\t'
,
skip
=
1
)
colnames
(
open
)
<-
cnames
close
<-
read.table
(
paste
(
wdir
,
'Motifs-Close_DARs.sorted.bed'
,
'knownResults.txt'
,
sep
=
'/'
),
row.names
=
NULL
,
sep
=
'\t'
,
skip
=
1
)
colnames
(
close
)
<-
cnames
## add family to df
df_motif
<-
list
(
'open'
=
open
,
'close'
=
close
)
## add motif family name
df
<-
list
()
for
(
n
in
names
(
df_motif
))
{
df
[[
n
]]
<-
parse_homer_known
(
df_motif
[[
n
]])
}
#Extract TF name and family
open
<-
df
$
open
close
<-
df
$
close
# comparison function for Open vs Close DARs
compare_open_close_dars
<-
function
(
open_file
,
close_file
,
pval_threshold
=
0.05
)
{
open
<-
open_file
open
$
dataset
<-
'Open_DARs'
close
<-
close_file
close
$
dataset
<-
'Close_DARs'
open_sig
<-
open
%>%
dplyr
::
filter
(
P_value
<
pval_threshold
)
close_sig
<-
close
%>%
dplyr
::
filter
(
P_value
<
pval_threshold
)
# Get motif
open_motifs
<-
open_sig
$
Motif_Name
close_motifs
<-
close_sig
$
Motif_Name
# overlaps
shared_motifs
<-
intersect
(
open_motifs
,
close_motifs
)
open_unique
<-
setdiff
(
open_motifs
,
close_motifs
)
close_unique
<-
setdiff
(
close_motifs
,
open_motifs
)
summary_stats
<-
list
(
open_significant
=
length
(
open_motifs
),
close_significant
=
length
(
close_motifs
),
shared
=
length
(
shared_motifs
),
open_unique
=
length
(
open_unique
),
close_unique
=
length
(
close_unique
)
)
if
(
length
(
shared_motifs
)
>
0
)
{
shared_comparison
<-
open_sig
%>%
filter
(
Motif_Name
%in%
shared_motifs
)
%>%
dplyr
::
select
(
Motif_Name
,
TF_Name
,
TF_Family
,
open_pval
=
P_value
,
open_logpval
=
Log_P_value
,
open_percent
=
Percent_Target
)
%>%
left_join
(
close_sig
%>%
filter
(
Motif_Name
%in%
shared_motifs
)
%>%
dplyr
::
select
(
Motif_Name
,
close_pval
=
P_value
,
close_logpval
=
Log_P_value
,
close_percent
=
Percent_Target
),
by
=
"Motif_Name"
)
%>%
mutate
(
logpval_diff
=
close_logpval
-
open_logpval
,
percent_diff
=
close_percent
-
open_percent
,
enrichment_bias
=
case_when
(
abs
(
close_logpval
)
>
abs
(
open_logpval
)
&
abs
(
logpval_diff
)
>
0.5
~
"Close-enriched"
,
abs
(
open_logpval
)
>
abs
(
close_logpval
)
&
abs
(
logpval_diff
)
>
0.5
~
"Open-enriched"
,
TRUE
~
"Similar"
)
)
}
else
{
shared_comparison
<-
tibble
()
}
all_sig_combined
<-
bind_rows
(
open_sig
,
close_sig
)
# results
list
(
summary
=
summary_stats
,
open_significant
=
open_sig
,
close_significant
=
close_sig
,
all_significant
=
all_sig_combined
,
shared_detailed
=
shared_comparison
,
shared_motifs
=
shared_motifs
,
open_unique_motifs
=
open_unique
,
close_unique_motifs
=
close_unique
)
}
rex
<-
compare_open_close_dars
(
open
,
close
,
pval_threshold
=
0.05
)
motif_data
<-
rex
$
shared_detailed
md
<-
motif_data
%>%
dplyr
::
select
(
TF_Name
,
open_logpval
,
close_logpval
,
enrichment_bias
,
percent_diff
)
%>%
pivot_longer
(
cols
=
c
(
open_logpval
,
close_logpval
),
names_to
=
"condition"
,
values_to
=
"logpval"
)
%>%
mutate
(
condition
=
ifelse
(
condition
==
"open_logpval"
,
"Open DARs"
,
"Close DARs"
),
logpval
=
abs
(
logpval
))
md
<-
data.frame
(
md
)
md
$
enrichment_bias
<-
factor
(
md
$
enrichment_bias
,
levels
=
c
(
"Open-enriched"
,
"Similar"
,
"Close-enriched"
))
md
<-
md
[
order
(
md
$
enrichment_bias
),]
md
$
TF_Name
<-
factor
(
md
$
TF_Name
,
levels
=
unique
(
md
$
TF_Name
))
mx
<-
md
%>%
group_by
(
enrichment_bias
)
%>%
arrange
(
enrichment_bias
,
logpval
)
l
<-
data.frame
()
for
(
n
in
unique
(
md
$
TF_Name
))
{
val
<-
md
[
md
$
TF_Name
==
n
,]
val
<-
val
[
val
$
logpval
==
max
(
val
$
logpval
),
]
if
(
nrow
(
l
)
==
0
)
{
l
<-
val
}
else
{
l
<-
rbind
(
l
,
val
)
}
}
l
<-
l
%>%
group_by
(
enrichment_bias
)
%>%
arrange
(
enrichment_bias
,
logpval
)
o
<-
subset
(
l
,
enrichment_bias
==
'Open-enriched'
)
final
<-
rbind
(
o
[
order
(
o
$
logpval
,
decreasing
=
TRUE
),],
subset
(
l
,
enrichment_bias
!=
'Open-enriched'
))
md
$
TF_Name
<-
factor
(
md
$
TF_Name
,
levels
=
final
$
TF_Name
)
md
$
condition
<-
ifelse
(
md
$
condition
==
'Open DARs'
,
'Open in High'
,
'Open in Low'
)
md
$
TF_Name
<-
reverse
(
md
$
TF_Name
)
plot1
<-
ggplot
(
md
,
aes
(
x
=
TF_Name
,
y
=
logpval
,
fill
=
condition
))
+
geom_col
(
position
=
'dodge'
,
width
=
0.7
,
color
=
'black'
)
+
scale_fill_manual
(
values
=
c
(
"Open in High"
=
"#0000ffff"
,
"Open in Low"
=
"#e74c3c"
))
+
coord_flip
()
+
geom_hline
(
yintercept
=
-
log
(
0.05
),
linetype
=
"dashed"
,
color
=
'red'
)
+
labs
(
title
=
""
,
#CORE Transcription Factor Binding sites enriched from Motif analysis",
subtitle
=
"HIGH vs Low Dose HSC Transplant"
,
x
=
""
,
y
=
"-Log P Value Enrichment"
,
fill
=
""
)
+
theme_minimal
()
+
theme
(
#
text
=
element_text
(
size
=
20
,
family
=
"Arial"
),
plot.title
=
element_text
(
size
=
20
,
face
=
"bold"
,
hjust
=
0.5
,
margin
=
margin
(
b
=
20
)),
axis.title.y
=
element_text
(
size
=
20
,
face
=
"bold"
,
family
=
"Arial"
),
axis.text.x
=
element_text
(
size
=
20
,
color
=
'black'
),
axis.text.y
=
element_text
(
size
=
35
,
color
=
'black'
,
family
=
"Arial"
,
angle
=
-45
,
hjust
=
1
,
vjust
=
1
),
legend.position
=
'right'
,
legend.title
=
element_text
(
size
=
18
,
face
=
"bold"
),
legend.text
=
element_text
(
size
=
16
),
legend.key.size
=
unit
(
1
,
"cm"
),
legend.key.width
=
unit
(
1.5
,
"cm"
),
legend.margin
=
margin
(
b
=
15
),
legend.box.spacing
=
unit
(
0.5
,
"cm"
),
panel.grid.major
=
element_blank
(),
panel.grid.minor
=
element_blank
(),
axis.line
=
element_line
(
colour
=
"black"
,
size
=
0.5
,
linetype
=
"solid"
),
axis.ticks
=
element_line
(
colour
=
"black"
,
size
=
0.5
),
axis.ticks.length
=
unit
(
0.25
,
"cm"
),
strip.placement
=
"outside"
)
+
scale_y_continuous
(
expand
=
c
(
0
,
0
),
position
=
"right"
)
#ggsave("motif_coreenrichment_region_open_vs_close_sorted_v2.svg", plot = plot1, width = 10, height = 14, units = "in")
#############################
dx
<-
rex
$
all_significant
dx_subset
<-
dx
[
dx
$
`Motif_Name`
%in%
unique
(
c
(
rex
$
open_unique_motifs
,
rex
$
close_unique_motifs
)),]
dx_subset
$
TF_Family
<-
factor
(
dx_subset
$
TF_Family
,
levels
=
rev
(
unique
(
dx_subset
$
TF_Family
)
))
dx_subset
$
TF_Name
<-
ifelse
(
duplicated
(
dx_subset
$
TF_Name
),
paste0
(
dx_subset
$
TF_Name
,
'-'
,
dx_subset
$
TF_Family
)
,
dx_subset
$
TF_Name
)
top20
<-
dx_subset
%>%
group_by
(
dataset
)
%>%
slice_head
(
n
=
20
)
%>%
mutate
(
rank
=
row_number
())
#############################
## load data with annotation
#############################
library
(
readxl
)
top_results_close
<-
data.frame
(
read_excel
(
'/beegfs/scratch/ric.dimicco/ric.dimicco/DiMiccoR_EL_90-1049491329_ATACseq/results/05-filter_qual_bos_qual25/multibamc/Top_20_Motif_analysis_annotated.xls'
,
sheet
=
'Open_in_Low'
))
colnames
(
top_results_close
)[
14
]
<-
'Annotation'
top_results_open
<-
data.frame
(
read_excel
(
'results/05-filter_qual_bos_qual25/multibamc/Top_20_Motif_analysis_annotated.xls'
,
sheet
=
'Open_in_High'
))
colnames
(
top_results_open
)[
14
]
<-
'Annotation'
top20_annotated
<-
left_join
(
top20
,
top_results_close
[,
c
(
'TF_Name'
,
'Annotation'
)],
by
=
'TF_Name'
)
top20_annotated_all
<-
left_join
(
top20_annotated
,
top_results_open
[,
c
(
'TF_Name'
,
'Annotation'
)],
by
=
'TF_Name'
)
top
<-
data.frame
(
top20_annotated_all
)
top
$
Annotation
<-
ifelse
(
is.na
(
top
$
Annotation.x
),
top
$
Annotation.y
,
top
$
Annotation.x
)
my_colors
<-
c
(
"HSC stemness"
=
"#a6cee3"
,
"HSC self-renewal"
=
"#33a02c"
,
"HSC homeostasis"
=
"#ffd92f"
,
"HSC senescence"
=
"#bdbdbd"
,
"Oxidative stress response"
=
"#b2df8a"
,
"Interferon signaling"
=
"#e31a1c"
,
"Myeloid committment"
=
"#1f78b4"
,
"Myegakaryo/erythroid committment"
=
"#ff7f00"
,
"Myeloid/erythroid committment"
=
"#6a3d9a"
,
"Lymphoid committment"
=
"#fb9a99"
)
top
$
Dataset
<-
ifelse
(
top
$
dataset
==
'Close_DARs'
,
'Open in Low'
,
'Open in High'
)
p
<-
ggplot
(
top
,
aes
(
x
=
reorder
(
TF_Name
,
Log_P_value
),
y
=-
Log_P_value
))
+
geom_bar
(
aes
(
fill
=
Annotation
),
stat
=
"identity"
,
position
=
"dodge"
,
alpha
=
0.7
,
size
=
2
)
+
facet_wrap
(
~
Dataset
,
scales
=
'free_x'
,
nrow
=
2
)
+
#scales = 'free',
theme_minimal
()
+
scale_fill_manual
(
values
=
my_colors
)
+
theme
(
legend.position
=
'right'
,
plot.title
=
element_text
(
hjust
=
0.5
),
text
=
element_text
(
size
=
14
),
panel.grid.major
=
element_blank
(),
panel.grid.minor
=
element_blank
(),
axis.title.x
=
element_blank
(),
plot.margin
=
margin
(
t
=
20
,
r
=
10
,
b
=
10
,
l
=
10
,
unit
=
'pt'
),
axis.line
=
element_line
(
colour
=
"black"
,
size
=
0.3
,
linetype
=
"solid"
),
axis.text.x
=
element_text
(
size
=
12
,
color
=
'black'
,
angle
=
45
,
hjust
=
1
),
axis.text.y
=
element_text
(
size
=
12
,
color
=
'black'
),
strip.placement
=
"outside"
,
)
+
geom_hline
(
yintercept
=
-
log
(
0.05
),
linetype
=
"dashed"
,
color
=
"red"
)
+
labs
(
title
=
""
,
subtitle
=
""
,
y
=
"-Log P Value Enrichment"
,
fill
=
""
)
+
guides
(
fill
=
guide_legend
(
ncol
=
1
))
ggsave
(
"motif_enrichment_region_open_vs_close_unique_analysis_annotation_v2.svg"
,
plot
=
p
,
width
=
9
,
height
=
5
,
units
=
"in"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment