"""
This predict function calling script file netMHCstabpan to do prediction.
"""

import os
import sys
from subprocess import Popen, PIPE
import tempfile 
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.WARNING)

EXECUTABLE_NAME = 'netMHCstabpan'
EXECUTABLE_DIR_PATH = os.path.relpath(os.path.dirname(__file__))
EXECUTABLE_FULL_PATH = os.path.join(EXECUTABLE_DIR_PATH, EXECUTABLE_NAME)

def predict(allele_name, binding_length, peptide, format='fasta'):
    logging.info("netMHCstabpan predicting: allele=%s, length=%s, peptide=%s, format=%s", allele_name, binding_length, peptide, format)
    accepted_formats = ['fasta', 'peptide']
    format = format.lower()
    if format not in accepted_formats:
        raise ValueError('format must be one of {}'.format(accepted_formats))
    

    # Eliminate the asterisk (*) from the allele_name:
    # Mamu-A1*04003 => Mamu-A1:04003 or SLA-1*1201 => SLA-1:1201
    if allele_name[:3] in ('Mam', 'SLA'): 
        allele_name = allele_name.replace('*', ':')    
    # HLA-A*01:01 => HLA-A01:01 or Patr-B*1401 => Patr-B1401
    else:
        allele_name = allele_name.replace('*', '')
    
    # peptide is a file name when format == 'peptide'
    if format == 'peptide':
        cmd = [EXECUTABLE_FULL_PATH, '-a', allele_name, '-p', peptide]
    else:
        # Make a file with the peptide as expected by netMHCstabpan. 
        infile=tempfile.NamedTemporaryFile(suffix='-netmhcstabpan-input', mode='w')
        logger.info('input filename: %s', infile.name)
        infile_contents = ">test\n{}\n".format(peptide)
        logging.debug('infile contents:\n%s', infile_contents)
        infile.write(infile_contents)
        infile.seek(0)
        
        # Execute netMHCstabpan & collect the output as a list.
        # sys.executable is the full path of the currently running python interpreter.
        cmd = [EXECUTABLE_FULL_PATH, '-a', allele_name, '-l', str(binding_length), infile.name]

    process = Popen(cmd, stdout=PIPE)
    (stdoutdata, stderrdata) = process.communicate()
    stdoutdata = stdoutdata.decode()
    output = stdoutdata.splitlines()
    logging.debug("\n".join(output))
    
    list_res = [tuple(res.split()) for res in output if not res.startswith('#') and res.split() and res.split()[0].isdigit()]

    if process.returncode != 0:
        msg = "Predictor did not execute.  Full command: %s\n" % cmd        
        raise Exception(msg)

    # The scores returned by netMHCstabpan are ic50?
    ic50s=[]
    for score in list_res:
        ic50 = float(score[5]) # column Thalf(h) is the 6th column
        ic50s.append(ic50)
    if format == 'fasta':
        if len(ic50s) != len(peptide)-int(binding_length)+1:
            raise ValueError("Sth wrong makes error as the num of scores not correct.")    
        infile.close()  # This automatically deletes the input file.
    return ic50s

allowed_binding_lengths = (8, 9, 10, 11, 12, 13, 14)

ALLELE_FILE_NAME = 'MHC_allele_names.txt'
ALLELE_FULL_PATH = os.path.join(EXECUTABLE_DIR_PATH, ALLELE_FILE_NAME)
with open(ALLELE_FULL_PATH, 'r') as allele_name_file:
    allele_names = allele_name_file.read() 
    allowed_allele_names = allele_names.split()
