#Import all dependencies from the scVAR enviroment (see installation instructions) #from scVAR import * import sys import pickle import os import scanpy as sc import pandas as pd #Check if all the arguments are in place if len(sys.argv) < 2: print("Errore: specify sample name as argument.") sys.exit(1) sample = sys.argv[1] out_path = '/CNRITB/lcelli/SCVAR/BLL_August/output/' + sample + '/' in_path = '/CNRITB/lcelli/SCVAR/BLL_August/input/' + sample + '/' # Create output folders if not os.path.exists(out_path): os.makedirs(out_path, exist_ok=True) print('Start Analysis', sample) # Specify transcriptomics file path tra_mat = in_path + 'matrix.mtx' barcode_tra = in_path + 'clean_barcodes.txt' feature = in_path + 'features.tsv' # Specify genomic file path var_mat = in_path + 'consensus_filtered_markdup.mtx' barcode_var = in_path + 'barcodes_var.tsv' snv = in_path + 'variants_filtered_markdup.txt' # Analize trascritomics and genomics separately adata = transcriptomicAnalysis(matrix_path=tra_mat, bcode_path=barcode_tra, feature_path=feature, bcode_variants=barcode_var) adata = variantAnalysis(adata, matrix_path=var_mat, bcode_path=barcode_var, variants_path=snv) #Perform data integration adata = omicsIntegration(adata) # Compute transcriptomics, genomics and integrated clusters at different resolutions for res in [0.01, 0.05, 0.5]: adata = calcOmicsClusters(adata, omic_key='variant', res=res) adata = calcOmicsClusters(adata, omic_key='trans', res=res) adata = calcOmicsClusters(adata, omic_key='int', res=res) # Optionally add metadata from other scRNA-seq analysis tools (i.e., Seurat) md = pd.read_csv('/CNRITB/lcelli/SCVAR/BLL_August/input/' + sample + '/metadata.csv', header=0, index_col=0) md = md.loc[list(adata.obs.index)] for metadata in ['SingleR_DatabaseImmuneCellExpressionData_labels', 'orig.ident', 'Phase', 'RNA_snn_res.0.6']: adata.obs[metadata] = md[metadata] adata.obs['RNA_snn_res.0.6'] = adata.obs['RNA_snn_res.0.6'].astype(str) # Save all data with open(out_path + sample + '_adata.pkl', 'wb') as f: pickle.dump(adata, f) #####Perform preliminary plotting##### sc.set_figure_params(scanpy=True, dpi=80, dpi_save=150, frameon=True, vector_friendly=True, fontsize=14, figsize=(12,8), color_map=None, format='png', facecolor='#FFFFFF', transparent=False, ipython_format='png2x') # Plotting for res in [0.01, 0.05, 0.5]: for omic in ['variant', 'trans', 'int']: p = sc.pl.embedding(adata, basis='int_umap', color=omic + '_clust_' + str(res), title=[sample + ' UMAP:' + 'int' + ' Cluster:' + omic + ' res' + str(res)], size=10, frameon=False, return_fig=True) p.savefig(out_path + sample + '_INT_umap_' + omic + '_cluster_res' + str(res) + '.png') for mtdt in ['SingleR_DatabaseImmuneCellExpressionData_labels', 'orig.ident', 'Phase', 'RNA_snn_res.0.6']: p = sc.pl.embedding(adata, basis='int_umap', color=mtdt, title=[sample + ' UMAP:' + 'int ' + mtdt], size=10, frameon=False, return_fig=True) p.savefig(out_path + sample + '_INT_umap_' + mtdt + '.png')