Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Bonini_TIGIT2024
Bonini_TIGIT2024_WES
Commits
8f452403
Commit
8f452403
authored
Aug 28, 2024
by
Stefano Beretta
Browse files
Upload New File
parent
946aea14
Changes
1
Show whitespace changes
Inline
Side-by-side
WES_plot_results.R
0 → 100644
View file @
8f452403
library
(
dplyr
)
library
(
ggplot2
)
library
(
RColorBrewer
)
library
(
stringr
)
library
(
e1071
)
library
(
openxlsx
)
library
(
scales
)
library
(
reshape2
)
snv_colors2
<-
function
(
del
=
FALSE
,
alpha
=
1
)
{
nuc
<-
c
(
"A"
,
"C"
,
"G"
,
"T"
)
# Point deletions
dels
<-
alpha
(
brewer.pal
(
5
,
"Greys"
)[
2
:
5
],
1
)
names
(
dels
)
<-
paste0
(
nuc
,
"*"
)
# Transitions
ts
<-
alpha
(
brewer.pal
(
5
,
"Blues"
)[
2
:
5
],
1
)
names
(
ts
)
<-
c
(
"CT"
,
"GA"
,
"AG"
,
"TC"
)
# Tansversions
tv1
<-
alpha
(
brewer.pal
(
5
,
"YlGn"
)[
2
:
5
],
1
)
names
(
tv1
)
<-
c
(
"AC"
,
"TG"
,
"AT"
,
"TA"
)
tv2
<-
alpha
(
brewer.pal
(
5
,
"OrRd"
)[
2
:
5
],
1
)
names
(
tv2
)
<-
c
(
"CA"
,
"GT"
,
"CG"
,
"GC"
)
var_cols
<-
c
(
ts
,
tv1
,
tv2
)
if
(
del
)
{
var_cols
<-
c
(
var_cols
,
dels
)
}
return
(
var_cols
)
}
#####################
### Variant Plots ###
#####################
plot_variants
<-
function
(
full.t
,
out_dir
,
plot_prefix
,
fill_by
)
{
dir.create
(
path
=
out_dir
,
showWarnings
=
F
)
# Per-Sample
tt
<-
full.t
%>%
filter
(
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
summarise
(
Count
=
n
())
write.xlsx
(
x
=
list
(
"NumVariants"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantCounts.xlsx"
),
sep
=
"/"
))
p
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
Count
,
fill
=
get
(
fill_by
),
color
=
get
(
fill_by
)))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
90
,
hjust
=
1
,
vjust
=
0.5
),
legend.position
=
"none"
)
+
geom_bar
(
stat
=
"identity"
,
alpha
=
0.6
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
xlab
(
""
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantCounts.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
tt
<-
full.t
%>%
filter
(
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
REF
,
ALT
)
%>%
summarise
(
Count
=
n
())
tt
<-
mutate
(
tt
,
TYPE
=
case_when
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
!=
"*"
~
"SNV"
,
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
ALT
==
"*"
~
"DEL"
,
nchar
(
REF
)
>
nchar
(
ALT
)
&
nchar
(
REF
)
-
nchar
(
ALT
)
>=
1
~
"DEL"
,
nchar
(
REF
)
<
nchar
(
ALT
)
&
nchar
(
ALT
)
-
nchar
(
REF
)
>=
1
~
"INS"
,
TRUE
~
"Other"
))
write.xlsx
(
x
=
list
(
"VariantClass"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantClassification.xlsx"
),
sep
=
"/"
))
tt_type
<-
tt
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
TYPE
)
%>%
summarise
(
SumCount
=
sum
(
Count
))
%>%
arrange
(
desc
(
SumCount
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
mutate
(
CountPerc
=
SumCount
/
sum
(
SumCount
))
write.xlsx
(
x
=
list
(
"VariantType"
=
tt_type
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantType.xlsx"
),
sep
=
"/"
))
p
<-
ggplot
(
tt_type
,
aes
(
x
=
Sample
,
y
=
SumCount
,
fill
=
TYPE
,
color
=
TYPE
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
),
legend.position
=
"top"
)
+
guides
(
fill
=
guide_legend
(
ncol
=
6
))
+
xlab
(
""
)
+
ylab
(
"Count"
)
+
scale_fill_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
scale_color_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
geom_bar
(
stat
=
"identity"
,
position
=
"stack"
,
alpha
=
0.6
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantTypes.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
p
<-
ggplot
(
tt_type
,
aes
(
x
=
Sample
,
y
=
CountPerc
,
fill
=
TYPE
,
color
=
TYPE
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
),
legend.position
=
"top"
)
+
guides
(
fill
=
guide_legend
(
ncol
=
6
))
+
xlab
(
""
)
+
ylab
(
"Count"
)
+
scale_fill_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
scale_color_brewer
(
palette
=
"Set1"
,
name
=
""
)
+
geom_bar
(
stat
=
"identity"
,
position
=
"stack"
,
alpha
=
0.6
)
+
scale_y_continuous
(
labels
=
scales
::
percent_format
(
accuracy
=
2
),
n.breaks
=
10
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_VariantTypesPerc.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
6
,
height
=
6
)
tt
<-
full.t
%>%
filter
(
nchar
(
REF
)
==
1
&
nchar
(
ALT
)
==
1
&
grepl
(
"chr"
,
CHROM
))
%>%
filter
(
!
CHROM
%in%
c
(
"chrY"
,
"chrM"
))
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
),
REF
,
ALT
)
%>%
summarise
(
Count
=
n
())
%>%
group_by
(
Sample
,
!!!
syms
(
fill_by
))
%>%
mutate
(
CountPerc
=
Count
/
sum
(
Count
))
tt
$
Variant
<-
paste0
(
tt
$
REF
,
tt
$
ALT
)
tt
$
Variant
<-
factor
(
tt
$
Variant
,
levels
=
sort
(
unique
(
tt
$
Variant
)))
write.xlsx
(
x
=
list
(
"SNV"
=
tt
),
file
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcounts.xlsx"
),
sep
=
"/"
))
tt
$
Variant
<-
factor
(
tt
$
Variant
,
levels
=
rev
(
names
(
snv_colors2
())))
p2
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
Count
,
fill
=
Variant
,
color
=
Variant
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
+
geom_bar
(
stat
=
"identity"
,
alpha
=
0.6
)
+
scale_fill_manual
(
values
=
snv_colors2
(
del
=
F
))
+
scale_color_manual
(
values
=
snv_colors2
(
del
=
F
))
+
xlab
(
""
)
+
scale_y_continuous
(
labels
=
scales
::
comma
,
n.breaks
=
8
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcounts.pdf"
),
sep
=
"/"
),
plot
=
p2
,
width
=
7
,
height
=
7
)
p2p
<-
ggplot
(
tt
,
aes
(
x
=
Sample
,
y
=
CountPerc
,
fill
=
Variant
,
color
=
Variant
))
+
theme_bw
(
base_size
=
12
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
+
geom_bar
(
stat
=
"identity"
,
alpha
=
0.6
)
+
scale_fill_manual
(
values
=
snv_colors2
(
del
=
F
))
+
scale_color_manual
(
values
=
snv_colors2
(
del
=
F
))
+
scale_y_continuous
(
labels
=
scales
::
percent_format
(
accuracy
=
2
),
n.breaks
=
10
)
+
xlab
(
""
)
+
facet_grid
(
.
~
get
(
fill_by
),
scales
=
"free_x"
,
space
=
"free"
)
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
plot_prefix
,
"_SNVcountsPerc.pdf"
),
sep
=
"/"
),
plot
=
p2p
,
width
=
7
,
height
=
7
)
}
###############
### General ###
###############
# Change directory
out_dir
<-
"."
full.t.nomulti.noGerm
<-
read.xlsx
(
xlsxFile
=
paste
(
out_dir
,
"WES_results.xlsx"
,
sep
=
"/"
),
sheet
=
"Full_NoMulti_noGerm"
)
plot_variants
(
full.t
=
full.t.nomulti.noGerm
,
out_dir
=
out_dir
,
plot_prefix
=
"WES_results"
,
fill_by
=
"Donor"
)
snpeff_df
<-
full.t.nomulti.noGerm
%>%
filter
(
snpEff_Impact
%in%
c
(
"HIGH"
,
"MODERATE"
,
"LOW"
))
%>%
group_by
(
Donor
,
Group
,
snpEff_Impact
)
%>%
summarise
(
Count
=
n
())
snpeff_df
<-
melt
(
reshape2
::
dcast
(
snpeff_df
,
Donor
+
Group
~
snpEff_Impact
,
value.var
=
"Count"
),
id.vars
=
c
(
"Donor"
,
"Group"
))
snpeff_df
$
variable
<-
factor
(
snpeff_df
$
variable
,
levels
=
c
(
"HIGH"
,
"MODERATE"
,
"LOW"
))
write.xlsx
(
x
=
list
(
"VarClassification"
=
snpeff_df
),
file
=
paste
(
out_dir
,
"WES_results_Variant_Classification.xlsx"
,
sep
=
"/"
))
p
<-
ggplot
(
snpeff_df
,
aes
(
x
=
Group
,
y
=
value
,
fill
=
variable
,
color
=
variable
))
+
theme_bw
(
base_size
=
14
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
+
geom_bar
(
stat
=
"identity"
,
alpha
=
0.6
)
+
ylab
(
"Count"
)
+
xlab
(
""
)
+
ggtitle
(
"Variant Classification"
)
+
scale_fill_brewer
(
palette
=
"Set1"
,
name
=
"Classification"
)
+
scale_color_brewer
(
palette
=
"Set1"
,
name
=
"Classification"
)
+
facet_grid
(
.
~
Donor
)
ggsave
(
filename
=
paste
(
out_dir
,
"WES_results_Variant_Classification.pdf"
,
sep
=
"/"
),
plot
=
p
,
width
=
8
,
height
=
6
)
##################
### Gene Lists ###
##################
full.circos
<-
read.xlsx
(
xlsxFile
=
paste
(
out_dir
,
"WES_results_GeneLists.xlsx"
,
sep
=
"/"
),
sheet
=
"GeneLists_Variants"
)
p
<-
ggplot
(
full.circos
,
aes
(
x
=
Sample
,
y
=
snpEff_GeneName
,
size
=
VAF
,
color
=
TYPE
))
+
theme_bw
(
base_size
=
10
)
+
theme
(
strip.text.y
=
element_text
(
angle
=
0
,
hjust
=
0
,
size
=
12
),
strip.background.y
=
element_rect
(
fill
=
"white"
,
color
=
"white"
))
+
geom_jitter
(
height
=
0
,
alpha
=
0.5
)
+
scale_color_brewer
(
palette
=
"Set1"
,
name
=
"Type"
)
+
xlab
(
""
)
+
ylab
(
""
)
+
facet_grid
(
GL
~
Group
,
scales
=
"free"
,
space
=
"free"
)
+
guides
(
color
=
guide_legend
(
override.aes
=
list
(
size
=
5
)))
ggsave
(
filename
=
paste
(
out_dir
,
paste0
(
"WES_results_GeneLists.pdf"
),
sep
=
"/"
),
plot
=
p
,
width
=
10
,
height
=
12
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment