Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Zonari_mPBHSCexp_2025
Zonari_mPBHSCexp_2025_BulkRNAseq
Commits
5b7884f3
Commit
5b7884f3
authored
Aug 04, 2025
by
Matteo Barcella
Browse files
upload DGE script
parent
2cf381d4
Changes
1
Show whitespace changes
Inline
Side-by-side
DGE/DGE.R
0 → 100644
View file @
5b7884f3
# DEGs analysis customized for 1vs1
# 72h vs 24h
library
(
edgeR
)
library
(
ggplot2
)
library
(
dplyr
)
library
(
openxlsx
)
library
(
ggrepel
)
library
(
biomaRt
)
cur.dir
<-
"DGE/"
data
=
read.table
(
"featureCounts_results.txt"
,
header
=
T
,
row.names
=
1
)
colnames
(
data
)
<-
gsub
(
x
=
gsub
(
x
=
colnames
(
data
),
pattern
=
"_Aligned.sortedByCoord.out.bam"
,
replacement
=
""
),
pattern
=
"X.beegfs.scratch.ric.gentner.ric.gentner.HSC.Exp2155.results.03.aln."
,
replacement
=
""
)
data
<-
data
[,
6
:
ncol
(
data
)]
bcv
<-
0.1
# i campioni in PCA sul totale sembrano molto vicini, quindi imposto bassa dispersione
counts
<-
data
comparisons
<-
list
(
T172h_vs_T124h_rd062
=
c
(
"TI72h_rd062"
,
"TI24h_rd062"
),
T172h_vs_T124h_202205
=
c
(
"TI72h_202205"
,
"TI24h_202205"
)
)
normalized_data
<-
list
()
# filtering out very low counts genes
comparisons_results
<-
list
()
for
(
i
in
names
(
comparisons
)){
if
(
i
==
"T172h_vs_T124h_202205"
){
bcv
<-
0.3
}
else
{
bcv
<-
0.1
}
ids
<-
comparisons
[[
i
]]
counts.ss
<-
subset
(
x
=
counts
,
select
=
ids
)
rsums
<-
rowSums
(
counts.ss
)
counts.ss
<-
subset
(
counts.ss
,
subset
=
rsums
>
31
)
# at least 30 counts as sum in the 2
y
<-
DGEList
(
counts
=
counts.ss
,
group
=
ids
)
TMM
<-
calcNormFactors
(
y
,
method
=
"TMM"
)
normdata
<-
cpm
(
y
=
TMM
,
normalized.lib.sizes
=
TRUE
)
TMM
<-
exactTest
(
TMM
,
dispersion
=
bcv
^
2
,
pair
=
rev
(
ids
))
table
(
p.adjust
(
TMM
$
table
$
PValue
,
method
=
"BH"
)
<
0.05
)
DGEresults
<-
TMM
$
table
DGEresults
$
FDR
<-
p.adjust
(
DGEresults
$
PValue
,
method
=
"BH"
)
DGEresults
$
isDEG
<-
DGEresults
$
FDR
<
0.05
# retrieve counts from the counts table
DGEresults_with_counts_tmp
<-
merge.data.frame
(
DGEresults
,
counts.ss
,
by
=
0
,
all.x
=
T
,
sort
=
F
)
rownames
(
DGEresults_with_counts_tmp
)
<-
DGEresults_with_counts_tmp
$
Row.names
DGEresults_with_counts_tmp
$
Row.names
<-
NULL
comparison.dir
<-
paste0
(
cur.dir
,
"/"
,
i
,
"/"
)
dir.create
(
comparison.dir
,
recursive
=
T
,
showWarnings
=
F
)
volcanodir
<-
paste0
(
comparison.dir
,
"/volcano/"
)
dir.create
(
volcanodir
,
recursive
=
T
,
showWarnings
=
F
)
saveRDS
(
object
=
DGEresults_with_counts_tmp
,
file
=
paste0
(
comparison.dir
,
"edgeR-"
,
ids
[
1
],
"-"
,
ids
[
2
],
".rds"
))
write.table
(
x
=
DGEresults_with_counts_tmp
,
file
=
paste0
(
comparison.dir
,
"edgeR-"
,
ids
[
1
],
"-"
,
ids
[
2
],
".txt"
),
sep
=
"\t"
,
row.names
=
T
)
comparisons_results
[[
i
]]
<-
DGEresults_with_counts_tmp
colnames
(
normdata
)
<-
paste0
(
colnames
(
normdata
),
".cpm"
)
normalized_data
[[
i
]]
<-
normdata
# Volcano plot + goi in all comparisons ####
logfc.thr
<-
0.6
adj.pval.thr
<-
0.05
ntop
<-
15
df
<-
DGEresults_with_counts_tmp
df
$
gene
<-
rownames
(
DGEresults_with_counts_tmp
)
df
$
FDR
<-
-
log10
(
df
$
FDR
)
print
(
head
(
df
))
df
<-
mutate
(
df
,
color
=
case_when
(
df
$
logFC
>
logfc.thr
&
df
$
FDR
>
-
log10
(
adj.pval.thr
)
~
"Overexpressed"
,
df
$
logFC
<
-
logfc.thr
&
df
$
FDR
>
-
log10
(
adj.pval.thr
)
~
"Underexpressed"
,
abs
(
df
$
logFC
)
<
logfc.thr
|
df
$
FDR
<
-
log10
(
adj.pval.thr
)
~
"NonSignificant"
)
)
df
<-
df
[
order
(
df
$
logFC
,
decreasing
=
TRUE
),]
under
<-
tail
(
subset
(
x
=
df
,
subset
=
color
==
"Underexpressed"
),
ntop
)
over
<-
head
(
subset
(
x
=
df
,
subset
=
color
==
"Overexpressed"
),
ntop
)
#genesofinterest <- subset(x = df, subset = color %in% c("Overexpressed","Underexpressed") & gene %in% goi)
hightext
<-
rbind
(
under
,
over
)
max_x
<-
ceiling
(
max
(
abs
(
df
$
logFC
)))
max_y
<-
ceiling
(
max
(
abs
(
na.omit
(
df
$
FDR
))))
vol
<-
ggplot
(
df
,
aes
(
x
=
logFC
,
y
=
FDR
,
colour
=
color
))
+
#ggtitle(paste(c1, "vs.", c2, sep = " ")) +
theme_bw
(
base_size
=
12
)
+
theme
(
legend.position
=
"right"
)
+
xlim
(
c
(
-
max_x
,
max_x
))
+
ylim
(
c
(
0
,
max_y
))
+
#xlab(paste("log2(", paste(c1, c2, sep = " / "), ")", sep = "")) +
xlab
(
"log2 Fold Change"
)
+
ylab
(
expression
(
-
log
[
10
](
"adjusted p-value"
)))
+
labs
(
title
=
i
)
+
geom_hline
(
yintercept
=
-
log10
(
adj.pval.thr
),
colour
=
"darkgray"
)
if
(
logfc.thr
>
0
)
{
vol
<-
vol
+
geom_vline
(
xintercept
=
logfc.thr
,
colour
=
"darkgray"
)
+
geom_vline
(
xintercept
=
-
logfc.thr
,
colour
=
"darkgray"
)
}
vol
<-
vol
+
geom_point
(
size
=
1
,
alpha
=
0.8
,
na.rm
=
T
)
+
scale_color_manual
(
name
=
"Expression"
,
values
=
c
(
Overexpressed
=
"#CD4F39"
,
Underexpressed
=
"#0B5394"
,
NonSignificant
=
"darkgray"
))
+
geom_label_repel
(
data
=
hightext
,
aes
(
label
=
gene
),
show.legend
=
FALSE
)
+
#geom_label_repel(data = genesofinterest, aes(label = gene, fill = color), show.legend = FALSE) +
theme
(
legend.position
=
c
(
0.5
,
0.9
),
plot.title
=
element_text
(
hjust
=
0.5
))
#scale_y_continuous(trans = "log2")
ggsave
(
filename
=
paste
(
volcanodir
,
paste
(
i
,
"volcano.png"
,
sep
=
"-"
),
sep
=
"/"
),
plot
=
vol
,
width
=
10
,
height
=
9
,
dpi
=
600
)
}
significant
<-
list
()
for
(
k
in
names
(
comparisons_results
))
{
significant
[[
paste0
(
k
,
"_UP"
)]]
<-
subset
(
comparisons_results
[[
k
]],
subset
=
isDEG
==
T
&
logFC
>
0
)
significant
[[
paste0
(
k
,
"_DOWN"
)]]
<-
subset
(
comparisons_results
[[
k
]],
subset
=
isDEG
==
T
&
logFC
<
0
)
significant
[[
paste0
(
k
,
"_UP"
)]]
<-
rownames
(
significant
[[
paste0
(
k
,
"_UP"
)]])
significant
[[
paste0
(
k
,
"_DOWN"
)]]
<-
rownames
(
significant
[[
paste0
(
k
,
"_DOWN"
)]])
}
# Connect to the database
ensembl
=
useMart
(
"ensembl"
,
dataset
=
"hsapiens_gene_ensembl"
)
# Define the attributes to retrieve
attributes
=
c
(
"external_gene_name"
,
"description"
)
# Retrieve the information
gene_info
=
getBM
(
attributes
,
mart
=
ensembl
)
comparisons_results
[[
"intersection_sign_concordant_UP"
]]
<-
as.data.frame
(
intersect
(
significant
$
T172h_vs_T124h_rd062_UP
,
significant
$
T172h_vs_T124h_202205_UP
))
colnames
(
comparisons_results
[[
"intersection_sign_concordant_UP"
]])
<-
"Concordant_UP"
rownames
(
comparisons_results
[[
"intersection_sign_concordant_UP"
]])
<-
comparisons_results
[[
"intersection_sign_concordant_UP"
]]
$
Concordant_UP
tmpdf
<-
comparisons_results
[[
"intersection_sign_concordant_UP"
]]
tmpdf.annot
<-
merge
(
tmpdf
,
gene_info
,
by.x
=
"Concordant_UP"
,
by.y
=
"external_gene_name"
,
all.x
=
T
,
sort
=
F
)
tmpdf.annot
<-
merge
(
tmpdf.annot
,
comparisons_results
$
T172h_vs_T124h_rd062
[,
c
(
1
,
6
:
7
)],
by.x
=
"Concordant_UP"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
colnames
(
tmpdf.annot
)[
3
]
<-
"logFC_rd062"
tmpdf.annot
<-
merge
(
tmpdf.annot
,
comparisons_results
$
T172h_vs_T124h_202205
[,
c
(
1
,
6
:
7
)],
by.x
=
"Concordant_UP"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
colnames
(
tmpdf.annot
)[
6
]
<-
"logFC_202205"
tmpdf.annot
<-
merge.data.frame
(
tmpdf.annot
,
normalized_data
$
T172h_vs_T124h_rd062
,
by.x
=
"Concordant_UP"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
tmpdf.annot
<-
merge.data.frame
(
tmpdf.annot
,
normalized_data
$
T172h_vs_T124h_202205
,
by.x
=
"Concordant_UP"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
comparisons_results
[[
"intersection_sign_concordant_UP"
]]
<-
tmpdf.annot
comparisons_results
[[
"intersection_sign_concordant_DW"
]]
<-
as.data.frame
(
intersect
(
significant
$
T172h_vs_T124h_rd062_DOWN
,
significant
$
T172h_vs_T124h_202205_DOWN
))
colnames
(
comparisons_results
[[
"intersection_sign_concordant_DW"
]])
<-
"Concordant_DOWN"
rownames
(
comparisons_results
[[
"intersection_sign_concordant_DW"
]])
<-
comparisons_results
[[
"intersection_sign_concordant_DW"
]]
$
Concordant_DOWN
tmpdf
<-
comparisons_results
[[
"intersection_sign_concordant_DW"
]]
tmpdf.annot
<-
merge
(
tmpdf
,
gene_info
,
by.x
=
"Concordant_DOWN"
,
by.y
=
"external_gene_name"
,
all.x
=
T
,
sort
=
F
)
tmpdf.annot
<-
merge
(
tmpdf.annot
,
comparisons_results
$
T172h_vs_T124h_rd062
[,
c
(
1
,
6
:
7
)],
by.x
=
"Concordant_DOWN"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
colnames
(
tmpdf.annot
)[
3
]
<-
"logFC_rd062"
tmpdf.annot
<-
merge
(
tmpdf.annot
,
comparisons_results
$
T172h_vs_T124h_202205
[,
c
(
1
,
6
:
7
)],
by.x
=
"Concordant_DOWN"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
colnames
(
tmpdf.annot
)[
6
]
<-
"logFC_202205"
tmpdf.annot
<-
merge.data.frame
(
tmpdf.annot
,
normalized_data
$
T172h_vs_T124h_rd062
,
by.x
=
"Concordant_DOWN"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
tmpdf.annot
<-
merge.data.frame
(
tmpdf.annot
,
normalized_data
$
T172h_vs_T124h_202205
,
by.x
=
"Concordant_DOWN"
,
by.y
=
0
,
all.x
=
T
,
sort
=
F
)
comparisons_results
[[
"intersection_sign_concordant_DW"
]]
<-
tmpdf.annot
saveRDS
(
comparisons_results
,
file
=
"Comparisons_exp1_1vs1.rds"
)
write.xlsx
(
comparisons_results
,
file
=
"Comparisons_exp1_1vs1.xlsx"
,
asTable
=
T
,
rowNames
=
T
,
overwrite
=
T
,
firstRow
=
T
)
saveRDS
(
normalized_data
,
file
=
"SizeFactors_norm_cpm_exp1_1vs1.rds"
)
normalized_data
$
T172h_vs_T124h_rd062
<-
as.data.frame
(
normalized_data
$
T172h_vs_T124h_rd062
)
normalized_data
$
T172h_vs_T124h_202205
<-
as.data.frame
(
normalized_data
$
T172h_vs_T124h_202205
)
write.xlsx
(
normalized_data
,
file
=
"SizeFactors_norm_cpm_exp1_1vs1.xlsx"
,
asTable
=
T
,
rowNames
=
T
,
overwrite
=
T
,
firstRow
=
T
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment