Commit 1ca1041f authored by Marco Monti's avatar Marco Monti
Browse files

I updated some scripts and added input files

parent 35c9bcca
......@@ -31,6 +31,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 1B ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.csv")))
df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax",
......@@ -41,6 +42,7 @@ df2 <- df1[df1$gene%in%gene_vector,] %>% arrange(factor(gene, levels=gene_vector
rownames(df2) <- df2[,1]
df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T)
#write.csv(df2.scaled, paste0(output_dir, "/Figure_1B_table.csv"), row.names=T)
# the df was exported and used to create a heatmap using Graphpad
......@@ -53,6 +55,8 @@ for (contr in getSheetNames(ff)) {
dge_res[[contr]] <- read.xlsx(ff, rowNames = T, sheet = contr)
}
names(dge_res)
sample_order <- c("RPM", "cDC1", "cDC2", "B220+", "LSEC", "KC")
# Plot DGE volcano
......@@ -122,6 +126,7 @@ ggsave(filename = paste(output_dir, "edgeR_DGE_res_volcano_Fig.3C.sx.pdf", sep =
#### Figure 1C.dx ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
df1 <- df1[,1:7] # UMI
rownames(df1)<-df1$miRNA
df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
......@@ -141,13 +146,15 @@ dev.off()
#### Figure 1D ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x, na.rm=T)*1000000}) # UMI normalized by CPM
df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
#df.families <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_Family.csv")))
df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
df.families[,1] <- sub(pattern = "p.*", replacement="p", x = df.families[,1])
df2 <- merge(df.families,df1,by=1)
df2 <- merge(df.families, df1, by=1)
df2 <- aggregate(df2[,-c(1:2)], by = df2["miR family"], FUN=sum, na.rm=T) # sum counts by family
gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
"miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
......@@ -158,4 +165,5 @@ df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR fam
rownames(df3) <- df3[,1]
df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T)
#write.csv(df2.scaled, paste0(output_dir, "/Figure_1D_table.csv"), row.names=T)
# The df was exported and used to create a heatmap using Graphpad
......@@ -41,6 +41,9 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# Import df
miR_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
sponge_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "spongeBT-control")
#miR_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_miR342-control.csv")))
#sponge_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_spongeBT-control.csv")))
# Add DEG color and label for volcano plot
miR_ctrl$DEG <- "NO"
......@@ -86,6 +89,7 @@ ggsave(filename = paste0(output_dir, "/Volcano_plot_3B.pdf"), plot=p2, width=7,
# Import miR-342-3p target list (from TargetScan)
miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.xlsx"))
#miR342_targets <- as.data.frame(read.csv(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.csv")))
miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3))
# ecdf plot (right panel)
......
......@@ -101,6 +101,7 @@ var_list <- c("UMAPh_1", "UMAPh_2","PROP.Group","orig.ident", "Sample","RNA_snn_
obs1_df <- FetchData(obs1, vars = var_list)
obs1_df$cell_ID <- rownames(obs1_df)
write.xlsx(obs1_df, file=paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.xlsx"), overwrite=T)
write.csv(obs1_df, paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.csv"))
# Dotplot with markers for population (Suppl.Fig.5A)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -2,40 +2,42 @@
**Reprogramming liver metastasis-associated macrophages towards an anti-tumoral phenotype through enforced miR-342 expression**
Bresesti C, Monti M, Beretta S, Merelli I, Squadrito M, _et al._; 2025
[Chiara Bresesti](https://orcid.org/0000-0002-1840-9774), [Marco Monti](https://orcid.org/0000-0003-1266-4325), [Stefano Beretta](https://orcid.org/0000-0003-4375-004X), [Ivan Merelli](https://orcid.org/0000-0003-3587-3680), [Mario Leonardo Squadrito](https://orcid.org/0000-0002-1188-0299), *et al.*; Cell Reports, 2025 <https://doi.org/10.1016/j.celrep.2025.115592>
Corresponding Author: Mario Leonardo Squadrito. Email: squadrito.mario@hsr.it. [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua)
Corresponding Author: Mario Leonardo Squadrito. Email: [squadrito.mario\@hsr.it](mailto:squadrito.mario@hsr.it){.email}. [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua)
GEO:
[GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq)
[GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs)
[GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations)
[GSE274046](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274046) (bulk RNA-seq on splenic and hepatic cell populations)
Raw data are on GEO:
[GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq)\
[GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs)\
[GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations)\
[GSE274046](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274046) (bulk RNA-seq on splenic and hepatic cell populations)
Other input data are on Open Research Data Repository (ORDR):
DOI: 10.17632/4gpbv5vpcr.1
## Directories and Files
- environment_singlecell5.yml: contains the conda virtual environment that can be used to install all the dependencies.
- scripts: folder with R scripts used for the analyses
- CB2025_figure_1_RNAseq.R
- CB2025_figure_3_RNAseq.R
- CB2025_figure_5_scRNAseq.R
- TCGA_analysis.R
- Output: results of the analyses
- Input: input files required to generate the figures
- miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
- miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
- miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx)
- miRNA_Family.xlsx: miRNA family information (Figure 1D)
- miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
- RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
- TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
- CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5)
- TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
- Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
- pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis)
- environment_singlecell5.yml: contains the conda virtual environment that can be used to install all the dependencies.
- scripts: folder with R scripts used for the analyses
- CB2025_figure_1_RNAseq.R
- CB2025_figure_3_RNAseq.R
- CB2025_figure_5_scRNAseq.R
- TCGA_analysis.R
- Output: results of the analyses
- CB_Annotation.final.CB_Fig.5A.csv: snRNAseq source data to reproduce UMAP in figure 5A
- Input: input files required to generate the figures
- miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
- miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
- miRNA_QIAseq_1510_173308.all_samples.summary.xlsx: Differential expression analysis results (Figure 1C.sx)
- miRNA_Family.xlsx: miRNA family information (Figure 1D)
- miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
- RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
- TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
- CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5) (present on ORDR)
- TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
- Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
- pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis) (present on ORDR)
## scRNAseq analysis
The initial preprocessing of the data, including mapping against the _Mus musculus_ GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).
The initial preprocessing of the data, including mapping against the *Mus musculus* GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment