'''
Run MHCflurry predictor on specified peptides.

By default, the presentation predictor is used, and predictions for
MHC I binding affinity, antigen processing, and the composite presentation score
are returned. If you just want binding affinity predictions, pass
--affinity-only.

Examples:

Write a CSV file containing the contents of INPUT.csv plus additional columns
giving MHCflurry predictions:

$ mhcflurry-predict INPUT.csv --out RESULT.csv

The input CSV file is expected to contain columns "allele", "peptide", and,
optionally, "n_flank", and "c_flank".

If `--out` is not specified, results are written to stdout.

You can also run on alleles and peptides specified on the commandline, in
which case predictions are written for *all combinations* of alleles and
peptides:

$ mhcflurry-predict --alleles HLA-A0201 H-2Kb --peptides SIINFEKL DENDREKLLL

Instead of individual alleles (in a CSV or on the command line), you can also
give a comma separated list of alleles giving a sample genotype. In this case,
the tightest binding affinity across the alleles for the sample will be
returned. For example:

$ mhcflurry-predict --peptides SIINFEKL DENDREKLLL \
    --alleles \
        HLA-A*02:01,HLA-A*03:01,HLA-B*57:01,HLA-B*45:01,HLA-C*02:01,HLA-C*07:02 \
        HLA-A*01:01,HLA-A*02:06,HLA-B*44:02,HLA-B*07:02,HLA-C*01:01,HLA-C*03:01

will give the tightest predicted affinities across alleles for each of the two
genotypes specified for each peptide.
'''
from __future__ import (
    print_function,
    division,
    absolute_import,
)
import sys
import itertools
import logging
import os

import pandas

from .downloads import get_default_class1_presentation_models_dir
from .class1_affinity_predictor import Class1AffinityPredictor
from .class1_presentation_predictor import Class1PresentationPredictor
from .version import __version__

def predict_peptides(peptides, alleles, no_throw=False, peptide_column="peptide", allele_column="allele", no_affinity_percentile=False, no_flanking=False, always_include_best_allele=False, output_path=None, include_individual_model_predictions=False, prediction_column_prefix="mhcflurry_", output_delimiter=",", affinity_only=True):
    logging.getLogger('tensorflow').disabled = True

    models_dir = get_default_class1_presentation_models_dir(test_exists=True)

    if os.path.exists(os.path.join(models_dir, "weights.csv")):
        # Using a presentation predictor.
        predictor = Class1PresentationPredictor.load(models_dir)
    else:
        # Using just an affinity predictor.
        affinity_predictor = Class1AffinityPredictor.load(models_dir)
        predictor = Class1PresentationPredictor(
            affinity_predictor=affinity_predictor)
        if not affinity_only:
            logging.warning(
                "Specified models are an affinity predictor, which implies "
                "--affinity-only. Specify this argument to silence this warning.")
            affinity_only = True



    pairs = list(itertools.product(alleles, peptides))
    df = pandas.DataFrame({
        "allele": [p[0] for p in pairs],
        "peptide": [p[1] for p in pairs],
    })
    logging.info(
        "Predicting for %d alleles and %d peptides = %d predictions" % (
            len(alleles), len(peptides), len(df)))

    allele_string_to_alleles = (
        df.drop_duplicates(allele_column).set_index(
            allele_column, drop=False)[
                allele_column
        ].str.split(r"[,\s]+")).to_dict()

    if affinity_only:
        predictions = predictor.predict_affinity(
            peptides=df[peptide_column].values,
            alleles=allele_string_to_alleles,
            sample_names=df[allele_column],
            throw=not no_throw,
            include_affinity_percentile=not no_affinity_percentile)
    else:
        n_flanks = None
        c_flanks = None

        predictions = predictor.predict(
            peptides=df[peptide_column].values,
            n_flanks=n_flanks,
            c_flanks=c_flanks,
            alleles=allele_string_to_alleles,
            sample_names=df[allele_column],
            throw=not no_throw,
            include_affinity_percentile=not no_affinity_percentile)

    # If each query is just for a single allele, the "best_allele" column
    # is redundant so we remove it.
    if not always_include_best_allele:
        if all(len(a) == 1 for a in allele_string_to_alleles.values()):
            del predictions["best_allele"]

    for col in predictions.columns:
        if col not in ("allele", "peptide", "sample_name", "peptide_num"):
            df[prediction_column_prefix + col] = predictions[col]

    if output_path:
        df.to_csv(output_path, index=False, sep=output_delimiter)
        print("Wrote: %s" % output_path)
    else:
        df.to_csv(sys.stdout, index=False, sep=output_delimiter)


def get_supported_alleles_lengths(alleles_format='list'):
    models_dir = get_default_class1_presentation_models_dir(test_exists=True)

    if os.path.exists(os.path.join(models_dir, "weights.csv")):
        # Using a presentation predictor.
        predictor = Class1PresentationPredictor.load(models_dir)
    else:
        # Using just an affinity predictor.
        affinity_predictor = Class1AffinityPredictor.load(models_dir)
        predictor = Class1PresentationPredictor(
            affinity_predictor=affinity_predictor)
        if not affinity_only:
            logging.warning(
                "Specified models are an affinity predictor, which implies "
                "--affinity-only. Specify this argument to silence this warning.")
            affinity_only = True

    peptide_lengths = ",".join(str(x) for x in range(
        predictor.supported_peptide_lengths[0],
        predictor.supported_peptide_lengths[1] + 1))
    strings = [
        "%s\t%s" % (allele, peptide_lengths)
        for allele in predictor.supported_alleles
    ]
    if alleles_format == 'tsv':
        return "\n".join(strings)
    else:
        return [ (allele, peptide_lengths) for allele in predictor.supported_alleles]