Commit 1ca1041f authored by Marco Monti's avatar Marco Monti
Browse files

I updated some scripts and added input files

parent 35c9bcca
......@@ -31,6 +31,7 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
#### Figure 1B ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx"), sheet=3, col_names = T, skip=1)) # sheet: umis.genes.polyA-mouse
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.csv")))
df1 <-df1[,-c(1,3:6)] # keep only gene name and UMI counts
df1[,-1] <-apply(df1[,-1],2,function(x){x/sum(x)*1000000}) # UMI normalized by CPM
gene_vector <- c("Cd19", "Ms4a1", "Fcer2a", "Ighm", "Cd8a", "Xcr1", "Itgae", "Itgax",
......@@ -41,6 +42,7 @@ df2 <- df1[df1$gene%in%gene_vector,] %>% arrange(factor(gene, levels=gene_vector
rownames(df2) <- df2[,1]
df2.scaled <- as.data.frame(t(scale(t(df2[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1B_table.xlsx"), rowNames=T)
#write.csv(df2.scaled, paste0(output_dir, "/Figure_1B_table.csv"), row.names=T)
# the df was exported and used to create a heatmap using Graphpad
......@@ -53,6 +55,8 @@ for (contr in getSheetNames(ff)) {
dge_res[[contr]] <- read.xlsx(ff, rowNames = T, sheet = contr)
}
names(dge_res)
sample_order <- c("RPM", "cDC1", "cDC2", "B220+", "LSEC", "KC")
# Plot DGE volcano
......@@ -122,6 +126,7 @@ ggsave(filename = paste(output_dir, "edgeR_DGE_res_volcano_Fig.3C.sx.pdf", sep =
#### Figure 1C.dx ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
df1 <- df1[,1:7] # UMI
rownames(df1)<-df1$miRNA
df2 <- apply(df1[,-1], 2, function(x) log(x)) # Log counts
......@@ -141,13 +146,15 @@ dev.off()
#### Figure 1D ####
df1 <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.xlsx"), sheet=2, col_names=T)) # sheet: miRNA_piRNA
#df1 <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_QIAseq_1510_173308.all_samples.summary.csv")))
df1 <- df1[-which(grepl("piR",df1[,1])),1:7] # UMI and piRNA removing
df1[,1] <- gsub("/.*","",df1[,1]) # leave only first miRNA for ambiguous entries
df1[,-1] <- apply(df1[,-1],2,function(x){x/sum(x, na.rm=T)*1000000}) # UMI normalized by CPM
df.families <- as.data.frame(read_excel(paste0(input_dir, "/miRNA_Family.xlsx"), sheet=1 ,col_names=T)) # miRNA families from miRBase
#df.families <- as.data.frame(read.csv(paste0(input_dir, "/miRNA_Family.csv")))
df.families <- df.families[df.families[,3]==10090,c(4,1)] # select mouse entries
df.families[,1] <- sub(pattern = "p.*", replacement="p", x = df.families[,1])
df2 <- merge(df.families,df1,by=1)
df2 <- merge(df.families, df1, by=1)
df2 <- aggregate(df2[,-c(1:2)], by = df2["miR family"], FUN=sum, na.rm=T) # sum counts by family
gene_vector <- c("miR-150-5p","miR-25-3p/32-5p/92-3p/363-3p/367-3p","miR-142-3p.1",
"miR-17-5p/20-5p/93-5p/106-5p","miR-191-5p",
......@@ -158,4 +165,5 @@ df3 <- subset(df2, df2$`miR family`%in% gene_vector) %>% arrange(factor(`miR fam
rownames(df3) <- df3[,1]
df3.scaled <- as.data.frame(t(scale(t(df3[-1])))) # Zscore normalization. Scaled only works on columns, so need to transform
openxlsx::write.xlsx(df2.scaled, paste0(output_dir, "/Figure_1D_table.xlsx"), rowNames=T)
#write.csv(df2.scaled, paste0(output_dir, "/Figure_1D_table.csv"), row.names=T)
# The df was exported and used to create a heatmap using Graphpad
......@@ -41,6 +41,9 @@ output_dir <- "/beegfs/scratch/ric.squadrito/ric.squadrito/90-935462466_scRNAseq
# Import df
miR_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "miR342-control")
sponge_ctrl <- read_excel(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results.xlsx"), sheet = "spongeBT-control")
#miR_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_miR342-control.csv")))
#sponge_ctrl <- as.data.frame(read.csv(paste0(input_dir, "/RNAseq_90-857433247_edgeR_results_spongeBT-control.csv")))
# Add DEG color and label for volcano plot
miR_ctrl$DEG <- "NO"
......@@ -86,6 +89,7 @@ ggsave(filename = paste0(output_dir, "/Volcano_plot_3B.pdf"), plot=p2, width=7,
# Import miR-342-3p target list (from TargetScan)
miR342_targets <- read_excel(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.xlsx"))
#miR342_targets <- as.data.frame(read.csv(paste0(input_dir, "/TargetScan8.0_miR-342-3p.predicted_targets.csv")))
miR342_targets <- filter(miR342_targets, miR342_targets$`Cumulative weighted context++ score`< (-0.3))
# ecdf plot (right panel)
......
......@@ -101,6 +101,7 @@ var_list <- c("UMAPh_1", "UMAPh_2","PROP.Group","orig.ident", "Sample","RNA_snn_
obs1_df <- FetchData(obs1, vars = var_list)
obs1_df$cell_ID <- rownames(obs1_df)
write.xlsx(obs1_df, file=paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.xlsx"), overwrite=T)
write.csv(obs1_df, paste0(output_dir, "/CB_Annotation.final.CB_Fig.5A.csv"))
# Dotplot with markers for population (Suppl.Fig.5A)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -2,16 +2,18 @@
**Reprogramming liver metastasis-associated macrophages towards an anti-tumoral phenotype through enforced miR-342 expression**
Bresesti C, Monti M, Beretta S, Merelli I, Squadrito M, _et al._; 2025
[Chiara Bresesti](https://orcid.org/0000-0002-1840-9774), [Marco Monti](https://orcid.org/0000-0003-1266-4325), [Stefano Beretta](https://orcid.org/0000-0003-4375-004X), [Ivan Merelli](https://orcid.org/0000-0003-3587-3680), [Mario Leonardo Squadrito](https://orcid.org/0000-0002-1188-0299), *et al.*; Cell Reports, 2025 <https://doi.org/10.1016/j.celrep.2025.115592>
Corresponding Author: Mario Leonardo Squadrito. Email: squadrito.mario@hsr.it. [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua)
Corresponding Author: Mario Leonardo Squadrito. Email: [squadrito.mario\@hsr.it](mailto:squadrito.mario@hsr.it){.email}. [![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/wouter_decoster.svg?style=social&label=Follow%20%40Mariosqua)](https://x.com/Mariosqua)
GEO:
[GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq)
[GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs)
[GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations)
Raw data are on GEO:
[GSE274043](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274043) (single-cell RNA-seq)\
[GSE274044](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274044) (RNA-seq on iKCs)\
[GSE274045](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274045) (small RNA-seq on splenic and hepatic cell populations)\
[GSE274046](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE274046) (bulk RNA-seq on splenic and hepatic cell populations)
Other input data are on Open Research Data Repository (ORDR):
DOI: 10.17632/4gpbv5vpcr.1
## Directories and Files
......@@ -22,6 +24,7 @@ GEO:
- CB2025_figure_5_scRNAseq.R
- TCGA_analysis.R
- Output: results of the analyses
- CB_Annotation.final.CB_Fig.5A.csv: snRNAseq source data to reproduce UMAP in figure 5A
- Input: input files required to generate the figures
- miRNA_QIAseq_1509_QIAseqUltraplexRNA_181342.xlsx: UMI and gene count data from RNA-seq (Figure 1B)
- miRNA_QIAseq_1509_181342_edgeR_results.xlsx: Summary data for miRNA and piRNA (Figure 1C.dx & 1D)
......@@ -30,12 +33,11 @@ GEO:
- miDB_sig5.MLS.rds: reference files for GSEA analysis (Figure 3 & 5)
- RNAseq_90-857433247_edgeR_results.xlsx: Differential gene expression analysis for miR-342 vs. control and spongeBT vs. control (Figure 3A & 3B & 3C & 3D)
- TargetScan8.0_miR-342-3p.predicted_targets.xlsx: Predicted target genes for miR-342-3p from TargetScan (Figure 3A & 3B)
- CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5)
- CB1_CB3_CB4_final.rds: Seurat object containing scRNA-seq data (Figure 5 & S5) (present on ORDR)
- TCGA_phenotype.tsv.gz: TCGA patient phenotype data (metadata) with clinical and demographic information (TCGA Analysis)
- Survival_SupplementalTable_S1_20171025_xena_sp: TCGA patient survival data, providing overall survival (OS) status and time (TCGA Analysis)
- pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis)
- pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs_08_04_16.xena.gz: TCGA pancancer miRNA expression data (FPKM values) across different tumor samples (TCGA Analysis) (present on ORDR)
## scRNAseq analysis
The initial preprocessing of the data, including mapping against the _Mus musculus_ GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).
The initial preprocessing of the data, including mapping against the *Mus musculus* GRCm38 reference genome and gene counting, was done using the 10x Genomics Cell Ranger Software (v7.2.0) using default parameters. The resulting data were imported into R and analyzed with the Seurat package (v5.0.1).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment