Commit 1ca1041f authored by Marco Monti's avatar Marco Monti
Browse files

I updated some scripts and added input files

parent 35c9bcca
...@@ -31,6 +31,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -31,6 +31,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 1B #### #### Figure 1B ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.csv")))
df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax", gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax",
...@@ -41,6 +42,7 @@ df2 <- df1[df1$gene%in%gene_vector,] %>% arrange(factor(gene, levels=gene_vector ...@@ -41,6 +42,7 @@ df2 <- df1[df1$gene%in%gene_vector,] %>% arrange(factor(gene, levels=gene_vector
rownames(df2) <- df2[,1] rownames(df2) <- df2[,1]
df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T) openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T)
#write.csv(df2.scaled, paste0(output_dir, "/Figure_1B_table.csv"), row.names=T)
# the df was exported and used to create a heatmap using Graphpad # the df was exported and used to create a heatmap using Graphpad
...@@ -53,6 +55,8 @@ for (contr in getSheetNames(ff)) { ...@@ -53,6 +55,8 @@ for (contr in getSheetNames(ff)) {
dge_res[[contr]] <- read.xlsx(ff, rowNames = T, sheet = contr) dge_res[[contr]] <- read.xlsx(ff, rowNames = T, sheet = contr)
} }
names(dge_res)
sample_order <- c("RPM", "cDC1", "cDC2", "B220+", "LSEC", "KC") sample_order <- c("RPM", "cDC1", "cDC2", "B220+", "LSEC", "KC")
# Plot DGE volcano # Plot DGE volcano
...@@ -122,6 +126,7 @@ ggsave(filename = paste(output_dir, "edgeR_DGE_res_volcano_Fig.3C.sx.pdf", sep = ...@@ -122,6 +126,7 @@ ggsave(filename = paste(output_dir, "edgeR_DGE_res_volcano_Fig.3C.sx.pdf", sep =
#### Figure 1C.dx #### #### Figure 1C.dx ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
df1 <- df1[,1:7] # UMI df1 <- df1[,1:7] # UMI
rownames(df1)<-df1$miRNA rownames(df1)<-df1$miRNA
df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
...@@ -141,13 +146,15 @@ dev.off() ...@@ -141,13 +146,15 @@ dev.off()
#### Figure 1D #### #### Figure 1D ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x, na.rm=T)*1000000}) # UMI normalized by CPM df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x, na.rm=T)*1000000}) # UMI normalized by CPM
df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
#df.families <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_Family.csv")))
df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
df.families[,1] <- sub(pattern = "p.*", replacement="p", x = df.families[,1]) df.families[,1] <- sub(pattern = "p.*", replacement="p", x = df.families[,1])
df2 <- merge(df.families,df1,by=1) df2 <- merge(df.families, df1, by=1)
df2 <- aggregate(df2[,-c(1:2)], by = df2["miR family"], FUN=sum, na.rm=T) # sum counts by family df2 <- aggregate(df2[,-c(1:2)], by = df2["miR family"], FUN=sum, na.rm=T) # sum counts by family
gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1", gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
"miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p", "miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
...@@ -158,4 +165,5 @@ df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR fam ...@@ -158,4 +165,5 @@ df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR fam
rownames(df3) <- df3[,1] rownames(df3) <- df3[,1]
df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T) openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T)
#write.csv(df2.scaled, paste0(output_dir, "/Figure_1D_table.csv"), row.names=T)
# The df was exported and used to create a heatmap using Graphpad # The df was exported and used to create a heatmap using Graphpad
...@@ -41,6 +41,9 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq ...@@ -41,6 +41,9 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# Import df # Import df
miR_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control") miR_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
sponge_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "spongeBT-control") sponge_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "spongeBT-control")
#miR_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_miR342-control.csv")))
#sponge_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_spongeBT-control.csv")))
# Add DEG color and label for volcano plot # Add DEG color and label for volcano plot
miR_ctrl$DEG <- "NO" miR_ctrl$DEG <- "NO"
...@@ -86,6 +89,7 @@ ggsave(filename = paste0(output_dir, "/Volcano_plot_3B.pdf"), plot=p2, width=7, ...@@ -86,6 +89,7 @@ ggsave(filename = paste0(output_dir, "/Volcano_plot_3B.pdf"), plot=p2, width=7,
# Import miR-342-3p target list (from TargetScan) # Import miR-342-3p target list (from TargetScan)
miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.xlsx")) miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.xlsx"))
#miR342_targets <- as.data.frame(read.csv(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.csv")))
miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3)) miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3))
# ecdf plot (right panel) # ecdf plot (right panel)
......
...@@ -101,6 +101,7 @@ var_list <- c("UMAPh_1", "UMAPh_2","PROP.Group","orig.ident", "Sample","RNA_snn_ ...@@ -101,6 +101,7 @@ var_list <- c("UMAPh_1", "UMAPh_2","PROP.Group","orig.ident", "Sample","RNA_snn_
obs1_df <- FetchData(obs1, vars = var_list) obs1_df <- FetchData(obs1, vars = var_list)
obs1_df$cell_ID <- rownames(obs1_df) obs1_df$cell_ID <- rownames(obs1_df)
write.xlsx(obs1_df, file=paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.xlsx"), overwrite=T) write.xlsx(obs1_df, file=paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.xlsx"), overwrite=T)
write.csv(obs1_df, paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.csv"))
# Dotplot with markers for population (Suppl.Fig.5A) # Dotplot with markers for population (Suppl.Fig.5A)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -2,40 +2,42 @@ ...@@ -2,40 +2,42 @@
**Reprogramming liver metastasis-associated macrophages towards an anti-tumoral phenotype through enforced miR-342 expression** **Reprogramming liver metastasis-associated macrophages towards an anti-tumoral phenotype through enforced miR-342 expression**
Bresesti C, Monti M, Beretta S, Merelli I, Squadrito M, _et al._; 2025 [Chiara Bresesti](https://orcid.org/0000-0002-1840-9774), [Marco Monti](https://orcid.org/0000-0003-1266-4325), [Stefano Beretta](https://orcid.org/0000-0003-4375-004X), [Ivan Merelli](https://orcid.org/0000-0003-3587-3680), [Mario Leonardo Squadrito](https://orcid.org/0000-0002-1188-0299), *et al.*; Cell Reports, 2025 <https://doi.org/10.1016/j.celrep.2025.115592>
Corresponding Author: Mario Leonardo Squadrito. Email: squadrito.mario@hsr.it. [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua) Corresponding Author: Mario Leonardo Squadrito. Email: [squadrito.mario\@hsr.it](mailto:squadrito.mario@hsr.it){.email}. [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua)
GEO: Raw data are on GEO:
[GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq) [GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq)\
[GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs) [GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs)\
[GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations) [GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations)\
[GSE274046](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274046) (bulk RNA-seq on splenic and hepatic cell populations) [GSE274046](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274046) (bulk RNA-seq on splenic and hepatic cell populations)
Other input data are on Open Research Data Repository (ORDR):
DOI: 10.17632/4gpbv5vpcr.1
## Directories and Files ## Directories and Files
- environment_singlecell5.yml: contains the conda virtual environment that can be used to install all the dependencies. - environment_singlecell5.yml: contains the conda virtual environment that can be used to install all the dependencies.
- scripts: folder with R scripts used for the analyses - scripts: folder with R scripts used for the analyses
- CB2025_figure_1_RNAseq.R - CB2025_figure_1_RNAseq.R
- CB2025_figure_3_RNAseq.R - CB2025_figure_3_RNAseq.R
- CB2025_figure_5_scRNAseq.R - CB2025_figure_5_scRNAseq.R
- TCGA_analysis.R - TCGA_analysis.R
- Output: results of the analyses - Output: results of the analyses
- Input: input files required to generate the figures - CB_Annotation.final.CB_Fig.5A.csv: snRNAseq source data to reproduce UMAP in figure 5A
- miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B) - Input: input files required to generate the figures
- miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D) - miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
- miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx) - miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
- miRNA_Family.xlsx: miRNA family information (Figure 1D) - miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx)
- miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5) - miRNA_Family.xlsx: miRNA family information (Figure 1D)
- RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D) - miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
- TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B) - RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
- CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5) - TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
- TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis) - CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5) (present on ORDR)
- Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis) - TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
- pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis) - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
- pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis) (present on ORDR)
## scRNAseq analysis ## scRNAseq analysis
The initial preprocessing of the data, including mapping against the _Mus musculus_ GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).
The initial preprocessing of the data, including mapping against the *Mus musculus* GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment