####extract data in 10x format for scvar import pandas as pd from scipy import io, sparse import os def convert_expression(expr_csv, out_dir): print("🔄 Converting expression data...") expr = pd.read_csv(expr_csv, index_col=0) # Save matrix in Matrix Market format expr_sparse = sparse.csr_matrix(expr.values) io.mmwrite(os.path.join(out_dir, "matrix.mtx"), expr_sparse) # Save barcodes barcodes = pd.Series(expr.columns) barcodes.to_csv(os.path.join(out_dir, "clean_barcodes.txt"), index=False, header=False) # Save features.tsv in 10X format: gene_id\tgene_name\tGene Expression features = pd.DataFrame({ "gene_id": expr.index, "gene_name": expr.index, "feature_type": "Gene Expression" }) features.to_csv(os.path.join(out_dir, "features.tsv"), sep="\t", index=False, header=False) print("✅ Expression conversion complete!") def convert_variants(variant_csv, out_dir): print("🔄 Converting variant data...") var = pd.read_csv(variant_csv, index_col=0) # Save matrix in Matrix Market format var_sparse = sparse.csr_matrix(var.values) io.mmwrite(os.path.join(out_dir, "consensus_filtered_markdup.mtx"), var_sparse) # Save barcodes barcodes = pd.Series(var.columns) barcodes.to_csv(os.path.join(out_dir, "barcodes_var.tsv"), index=False, header=False) # Save variant list variants = pd.Series(var.index) variants.to_csv(os.path.join(out_dir, "variants_filtered_markdup.txt"), index=False, header=False) print("✅ Variant conversion complete!") def main(): import argparse parser = argparse.ArgumentParser(description="Convert synthetic scRNA-seq and variant data into scVAR-compatible format.") parser.add_argument("--expression_csv", required=True, help="Path to expression.csv") parser.add_argument("--variant_csv", required=True, help="Path to variants.csv") parser.add_argument("--output_dir", required=True, help="Directory to save scVAR-compatible files") args = parser.parse_args() os.makedirs(args.output_dir, exist_ok=True) convert_expression(args.expression_csv, args.output_dir) convert_variants(args.variant_csv, args.output_dir) if __name__ == "__main__": main()