I updated some scripts and added input files

1ca1041f · Marco Monti · 35c9bcca · 1ca1041f · 1ca1041f · 1ca1041f
Commit 1ca1041f authored May 08, 2025 by Marco Monti
--- a/CB2025_figure_1_RNAseq.R
+++ b/CB2025_figure_1_RNAseq.R
@@ -31,6 +31,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq

 #### Figure 1B ####
 df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
+#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.csv")))
 df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
 df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
 gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax", 
@@ -41,6 +42,7 @@ df2 <- df1[df1$gene%in%gene_vector,] %>% arrange(factor(gene, levels=gene_vector
 rownames(df2) <- df2[,1]
 df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
 openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T)
+#write.csv(df2.scaled, paste0(output_dir, "/Figure_1B_table.csv"), row.names=T)
 # the df was exported and used to create a heatmap using Graphpad


@@ -53,6 +55,8 @@ for (contr in getSheetNames(ff)) {
  dge_res[[contr]] <- read.xlsx(ff, rowNames = T, sheet = contr)
 }

+names(dge_res)
+
 sample_order <- c("RPM", "cDC1", "cDC2", "B220+", "LSEC", "KC")

 # Plot DGE volcano
@@ -122,6 +126,7 @@ ggsave(filename = paste(output_dir, "edgeR_DGE_res_volcano_Fig.3C.sx.pdf", sep =

 #### Figure 1C.dx ####
 df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
+#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
 df1 <- df1[,1:7] # UMI 
 rownames(df1)<-df1$miRNA
 df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
@@ -141,13 +146,15 @@ dev.off()

 #### Figure 1D ####
 df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
+#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
 df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
 df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
 df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x, na.rm=T)*1000000}) # UMI normalized by CPM
 df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
+#df.families <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_Family.csv")))
 df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
 df.families[,1] <- sub(pattern = "p.*", replacement="p", x = df.families[,1])
-df2 <- merge(df.families,df1,by=1)
+df2 <- merge(df.families, df1, by=1)
 df2 <- aggregate(df2[,-c(1:2)], by = df2["miR family"], FUN=sum, na.rm=T) # sum counts by family
 gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
                 "miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
@@ -158,4 +165,5 @@ df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR fam
 rownames(df3) <- df3[,1]
 df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
 openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T)
+#write.csv(df2.scaled, paste0(output_dir, "/Figure_1D_table.csv"), row.names=T)
 # The df was exported and used to create a heatmap using Graphpad
--- a/CB2025_figure_3_RNAseq.R
+++ b/CB2025_figure_3_RNAseq.R
@@ -41,6 +41,9 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
 # Import df
 miR_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
 sponge_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "spongeBT-control")
+#miR_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_miR342-control.csv")))
+#sponge_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_spongeBT-control.csv")))
+

 # Add DEG color and label for volcano plot
 miR_ctrl$DEG <- "NO"
@@ -86,6 +89,7 @@ ggsave(filename = paste0(output_dir, "/Volcano_plot_3B.pdf"), plot=p2, width=7,

 # Import miR-342-3p target list (from TargetScan)
 miR342_targets <-  read_excel(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.xlsx"))
+#miR342_targets <- as.data.frame(read.csv(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.csv")))
 miR342_targets <-  filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3))

 # ecdf plot (right panel)

--- a/CB2025_figure_5_scRNAseq.R
+++ b/CB2025_figure_5_scRNAseq.R
@@ -101,6 +101,7 @@ var_list <- c("UMAPh_1", "UMAPh_2","PROP.Group","orig.ident", "Sample","RNA_snn_
 obs1_df <- FetchData(obs1, vars = var_list)
 obs1_df$cell_ID <- rownames(obs1_df)
 write.xlsx(obs1_df, file=paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.xlsx"), overwrite=T)
+write.csv(obs1_df, paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.csv"))


 # Dotplot with markers for population (Suppl.Fig.5A)

--- a/Input/RNAseq_90-857433247_edgeR_results.xlsx
+++ b/Input/RNAseq_90-857433247_edgeR_results.xlsx
--- a/Input/Survival_SupplementalTable_S1_20171025_xena_sp
+++ b/Input/Survival_SupplementalTable_S1_20171025_xena_sp
--- a/Input/TCGA_phenotype.tsv.gz
+++ b/Input/TCGA_phenotype.tsv.gz
--- a/Input/TargetScan8.0_miR-342-3p.predicted_targets.csv
+++ b/Input/TargetScan8.0_miR-342-3p.predicted_targets.csv
--- a/Input/TargetScan8.0_miR-342-3p.predicted_targets.xlsx
+++ b/Input/TargetScan8.0_miR-342-3p.predicted_targets.xlsx
--- a/Input/miDB_sig5.MLS.rds
+++ b/Input/miDB_sig5.MLS.rds
--- a/Input/miRNA_Family.csv
+++ b/Input/miRNA_Family.csv
--- a/Input/miRNA_Family.xlsx
+++ b/Input/miRNA_Family.xlsx
--- a/Input/miRNA_QIAseq_1509_181342_edgeR_results.xlsx
+++ b/Input/miRNA_QIAseq_1509_181342_edgeR_results.xlsx
--- a/Input/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx
+++ b/Input/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx
--- a/Input/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx
+++ b/Input/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx
--- a/Output/CB_Annotation.final.CB_Fig.5A.csv
+++ b/Output/CB_Annotation.final.CB_Fig.5A.csv
--- a/README.md
+++ b/README.md
@@ -2,40 +2,42 @@

 **Reprogramming liver metastasis-associated macrophages towards an anti-tumoral phenotype through enforced miR-342 expression**

-Bresesti C, Monti M, Beretta S, Merelli I, Squadrito M, _et al._; 2025
+[Chiara Bresesti](https://orcid.org/0000-0002-1840-9774), [Marco Monti](https://orcid.org/0000-0003-1266-4325), [Stefano Beretta](https://orcid.org/0000-0003-4375-004X), [Ivan Merelli](https://orcid.org/0000-0003-3587-3680), [Mario Leonardo Squadrito](https://orcid.org/0000-0002-1188-0299), *et al.*; Cell Reports, 2025 <https://doi.org/10.1016/j.celrep.2025.115592>

-Corresponding Author: Mario Leonardo Squadrito. Email: squadrito.mario@hsr.it.  [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua)
+Corresponding Author: Mario Leonardo Squadrito. Email: [squadrito.mario\@hsr.it](mailto:squadrito.mario@hsr.it){.email}. [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua)

-GEO:  
-[GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq)  
-[GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs)  
-[GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations)  
-[GSE274046](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274046) (bulk RNA-seq on splenic and hepatic cell populations)  
+Raw data are on GEO:  
+[GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq)\
+[GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs)\
+[GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations)\
+[GSE274046](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274046) (bulk RNA-seq on splenic and hepatic cell populations)

+Other input data are on Open Research Data Repository (ORDR):  
+DOI: 10.17632/4gpbv5vpcr.1

 ## Directories and Files

- environment_singlecell5.yml: contains the conda virtual environment that can be used to install all the dependencies.
- scripts: folder with R scripts used for the analyses
-    - CB2025_figure_1_RNAseq.R
-    - CB2025_figure_3_RNAseq.R
-    - CB2025_figure_5_scRNAseq.R
-    - TCGA_analysis.R
- Output: results of the analyses
- Input: input files required to generate the figures
-    - miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
-    - miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
-    - miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx)
-    - miRNA_Family.xlsx: miRNA family information (Figure 1D)
-    - miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
-    - RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
-    - TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
-    - CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5)
-    - TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
-    - Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
-    - pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis)
-
+-   environment_singlecell5.yml: contains the conda virtual environment that can be used to install all the dependencies.
+-   scripts: folder with R scripts used for the analyses
+    -   CB2025_figure_1_RNAseq.R
+    -   CB2025_figure_3_RNAseq.R
+    -   CB2025_figure_5_scRNAseq.R
+    -   TCGA_analysis.R
+-   Output: results of the analyses
+    -   CB_Annotation.final.CB_Fig.5A.csv: snRNAseq source data to reproduce UMAP in figure 5A
+-   Input: input files required to generate the figures
+    -   miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
+    -   miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
+    -   miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx)
+    -   miRNA_Family.xlsx: miRNA family information (Figure 1D)
+    -   miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
+    -   RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
+    -   TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
+    -   CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5) (present on ORDR)
+    -   TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
+    -   Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
+    -   pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis) (present on ORDR)

 ## scRNAseq analysis
-The initial preprocessing of the data, including mapping against the _Mus musculus_ GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).

+The initial preprocessing of the data, including mapping against the *Mus musculus* GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).