
####extract data in 10x format for scvar

import pandas as pd
from scipy import io, sparse
import os

def convert_expression(expr_csv, out_dir):
    print("🔄 Converting expression data...")
    expr = pd.read_csv(expr_csv, index_col=0)

    # Save matrix in Matrix Market format
    expr_sparse = sparse.csr_matrix(expr.values)
    io.mmwrite(os.path.join(out_dir, "matrix.mtx"), expr_sparse)

    # Save barcodes
    barcodes = pd.Series(expr.columns)
    barcodes.to_csv(os.path.join(out_dir, "clean_barcodes.txt"), index=False, header=False)

    # Save features.tsv in 10X format: gene_id\tgene_name\tGene Expression
    features = pd.DataFrame({
        "gene_id": expr.index,
        "gene_name": expr.index,
        "feature_type": "Gene Expression"
    })
    features.to_csv(os.path.join(out_dir, "features.tsv"), sep="\t", index=False, header=False)

    print("✅ Expression conversion complete!")

def convert_variants(variant_csv, out_dir):
    print("🔄 Converting variant data...")
    var = pd.read_csv(variant_csv, index_col=0)

    # Save matrix in Matrix Market format
    var_sparse = sparse.csr_matrix(var.values)
    io.mmwrite(os.path.join(out_dir, "consensus_filtered_markdup.mtx"), var_sparse)

    # Save barcodes
    barcodes = pd.Series(var.columns)
    barcodes.to_csv(os.path.join(out_dir, "barcodes_var.tsv"), index=False, header=False)

    # Save variant list
    variants = pd.Series(var.index)
    variants.to_csv(os.path.join(out_dir, "variants_filtered_markdup.txt"), index=False, header=False)

    print("✅ Variant conversion complete!")

def main():
    import argparse
    parser = argparse.ArgumentParser(description="Convert synthetic scRNA-seq and variant data into scVAR-compatible format.")
    parser.add_argument("--expression_csv", required=True, help="Path to expression.csv")
    parser.add_argument("--variant_csv", required=True, help="Path to variants.csv")
    parser.add_argument("--output_dir", required=True, help="Directory to save scVAR-compatible files")

    args = parser.parse_args()

    os.makedirs(args.output_dir, exist_ok=True)

    convert_expression(args.expression_csv, args.output_dir)
    convert_variants(args.variant_csv, args.output_dir)

if __name__ == "__main__":
    main()

