#!/usr/bin/env python3

db_dir = config["database"]

rule genomad_database:
    output:
        os.path.join(db_dir, "genomad_db/genomad_db")
    params:
        database_dir = os.path.join(db_dir)
    conda:
        os.path.join(CONDAENV,"genomad.yaml")
    shell:
        "mkdir -p {params.database_dir} && " 
        "genomad download-database {params.database_dir} "

rule eggNOGG_database:
    output:
        directory(os.path.join(db_dir, "eggNOG/"))
    conda:
        os.path.join(CONDAENV,"eggnog.yaml")
    shell:
        "mkdir -p {output} && " 
        "download_eggnog_data.py  --data_dir {output} -y "

rule GTDBTK_database:
    output:
        directory(os.path.join(db_dir, "release220/")),
        os.path.join(db_dir, "gtdbtk.downloaded")
    params:
        db = db_dir,
    shell:
        "mkdir -p {params.db} && "
        "wget https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz -P {params.db} && "
        "tar -C {params.db} -xvzf {params.db}/gtdbtk_r220_data.tar.gz && "
        "rm {params.db}/gtdbtk_r220_data.tar.gz && touch {output[1]}"

rule cat_database:
    output:
        database = directory(os.path.join(db_dir, "catbat_db")),
        taxo = directory(os.path.join(db_dir, "catbat_taxo")),
    threads:
        10
    conda:
        os.path.join(CONDAENV,"catbat.yaml")
    shell:
        "if [ -f {output.database} ]; then "        
        "echo CAT database found ; "
        "else "
        "CAT prepare --fresh "
        "-n {threads} "
        "-d {output.database} "
        "-t {output.taxo} ; "
        "fi "
