#coding: utf-8
# Generic/Built-in
import sys
import os
import gzip
import glob
import yaml

# Other Libs
from snakemake.logging import logger


# Owned
import scripts.parserconfig as conf
import scripts.cmdparser as cmdparser
if config["Binning"]["strategies"] == "SASB" or config["Binning"]["strategies"] == "CASB" :
    ruleorder: binning_mapping_SB > bowtie2_map
if config["Binning"]["strategies"] == "SACB" or config["Binning"]["strategies"] == "CACB" :
    ruleorder: binning_mapping_CB > bowtie2_map
#########################################################################
# conf.parse (see scripts.parserconfig.py) help to parse the config file
#    reads2use : dictionnary with samples names as keys and list of path 
#    to use for motus, assembly etc, as value.
#    
#    i.e redirect some rules' input toward raw files,
#    pre-processing output and/or merging output.
#    
#    samples : dictionnary with samples names as keys
#    and path of raw fastq as values.
#    
#    deconta : dictionary with species name (targets
#    for contamination streaming) as keys and path to index as values.
#########################################################################

env_dir = "../envs" #It is defined in main Snakefile but increase readibility
working_dir = config["project"]
tmpdir = os.path.join(working_dir, "tmp") #It is defined in main Snakefile but increase readibility
logdir = os.path.join(working_dir, "logs")

reads2use, samples, deconta = conf.parse(config)

# filter samples dict to produce two sub-dictionnary one for paired-end data and another for single-end data
PE_samples = dict(filter(lambda samples: len(samples[1]) == 2, samples.items()))
SE_samples = dict(filter(lambda samples: len(samples[1]) == 1, samples.items()))

# Loads config value
wd = config["project"]
tmp = config["temp"]


# path to conda environment
CONDAENV = "../envs"

def get_PE_output(dico_samples):
    """
    Returns list of PE output from fastP.
    """
    list_PE_output = []
    for sample in dico_samples:
        for run in dico_samples[sample]:
            for index in ['R1', 'R2', 'unpaired']:
                list_PE_output.append(os.path.join(reads_PE_dir,
                    f'{sample}/{sample}_{run}_{index}.filtered.fastq.gz'))
    return list_PE_output

def get_SE_output(dico_samples):
    """
    Returns list of SE output from fastP.
    """
    list_SE_output = []
    for sample in dico_samples:
        for run in dico_samples[sample]:
            list_SE_output.append(os.path.join(reads_SE_dir,
                f'{sample}/{sample}_{run}_SE.filtered.fastq.gz'))
    return list_SE_output

#ruleorder: bowtie2_map > binning_mapping

#########################################################
#               include subworkflow                     #
#########################################################


#include: "rules/SCAPP.smk"
#include: "rules/gene_cat.smk"
include: "rules/assembly.smk"
include: "rules/binning.smk"
include: "rules/motus.smk"
include: "rules/QC.smk"
include: "rules/genomes_collection.smk"
include: "rules/databases.smk"
include: "rules/genes_collection.smk"
#include: "rules/annotations.smk"


###########################################################
#                         ALL                             #
###########################################################

if config["genes_collection"]:
    rule all:
        input:
            os.path.join(tmpdir, "finished_assembly"),
            os.path.join(tmpdir, "finished_binning"),
            os.path.join(tmpdir, "finished_QC"),
            os.path.join(tmpdir, "finished_motus"),
            os.path.join(tmpdir, "finished_gene_cat"),
            os.path.join(tmpdir, "finished_genomes_collection")
        priority:5
###########################################################
#                       GENE CAT                          #
###########################################################
    rule run_gene_cat:
        output:
            temp(os.path.join(tmpdir, "finished_gene_cat"))
        input:
            os.path.join(tmpdir, "gene_catalogue.checkpoint")
        shell:
            "touch {output}"
else:
    rule all:
        input:
            os.path.join(tmpdir, "finished_assembly"),
            os.path.join(tmpdir, "finished_binning"),
            os.path.join(tmpdir, "finished_QC"),
            os.path.join(tmpdir, "finished_motus"),
            os.path.join(tmpdir, "finished_genomes_collection")
        priority:5


###########################################################
#                         MAGS                            #
###########################################################

rule run_genomes:
   output:
       temp(os.path.join(tmpdir, "finished_genomes_collection"))
   input:
       os.path.join(tmpdir, "genome_catalogue.checkpoint")
       # os.path.join(tmpdir, "genomes_collection_finished.checkpoint"),
       # os.path.join(tmpdir, "genomes_annotation_finished.checkpoint"),
       # os.path.join(tmpdir, "genome_catalogue.checkpoint"),
   shell:
       "touch {output}"

rule run_binning:
   output:
       temp(os.path.join(tmpdir, "finished_binning"))
   input:
       os.path.join(tmpdir, "binning_finished.checkpoint")
   shell:
       "touch {output}"



###########################################################
#                       Assembly                          #
###########################################################

rule run_assembly:
    output:
        temp(os.path.join(tmpdir, "finished_assembly"))
    input:
        os.path.join(tmpdir, "assembly.checkpoint")
    shell:
        "touch {output}"

###########################################################
#                   mOTUS profling                        #
###########################################################


rule run_motus:
    output:
        temp(os.path.join(tmpdir, "finished_motus")),
    input:
        os.path.join(tmpdir, "motus.checkpoint")
    priority:5
    shell:
        "touch {output}"


###########################################################
#                   PRE-PROCESSING                        #
###########################################################


rule run_QC:
    output:
        temp(os.path.join(tmpdir, "finished_QC")),
    input:
        get_PE_output(PE_samples),
        get_SE_output(SE_samples),
        os.path.join(tmpdir, "finished_multiqc"),
        os.path.join(tmpdir, "QC.checkpoint")
    priority:5
    shell:
        "touch {output}"

rule run_multiqc:
    output:
        temp(os.path.join(tmpdir, "finished_multiqc"))
    input:
        os.path.join(tmpdir, "multiqc.checkpoint")
    priority: 0
    shell:
        "touch {output}"
