import os
import sys
from subprocess import Popen, PIPE
import tempfile
import logging
logger = logging.getLogger(__name__)

EXECUTABLE_NAME = 'netMHC'
EXECUTABLE_DIR_PATH = os.path.relpath(os.path.dirname(__file__))
EXECUTABLE_FULL_PATH = os.path.join(EXECUTABLE_DIR_PATH, EXECUTABLE_NAME)

def predict(allele_name, binding_length, peptide, sequence_format='fasta'):
    accepted_formats = ['fasta', 'peptide']
    sequence_format = sequence_format.lower()
    if sequence_format not in accepted_formats:
        raise ValueError('format must be one of {}'.format(accepted_formats))

    # Eliminate the asterisk (*) and colon (:) from the allele_name
    allele_name = allele_name.replace('*', '').replace(':','')

    # Make a file with the peptide as expected by netMHC. 
    infile=tempfile.NamedTemporaryFile(suffix='-ann-input', mode='w')
    logger.info('input filename: %s', infile.name)
    infile_contents = ">test\n{}\n".format(peptide)
    logging.debug('infile contents:\n%s', infile_contents)
    infile.write(infile_contents)
    infile.seek(0)
    
    # Execute netMHC & collect the output as a list of lines.
    # sys.executable is the full path of the currently running python interpreter.
    cmd = [EXECUTABLE_FULL_PATH, '-a', allele_name, '-l', str(binding_length), infile.name]
    process = Popen(cmd, stdout=PIPE)
    stdoutdata, stderrdata = process.communicate()
    
    # tranfer bytes type to str
    stdoutdata = stdoutdata.decode()
    output = stdoutdata.splitlines()
    # list_res is list of tuples of prediction scores without the comment lines and column headers
    list_res = [tuple(res.split()) for res in output if not res.startswith('#') and res.split() and res.split()[0].isdigit()]

    if process.returncode != 0:
        msg = "ANNPredictor did not execute.  Full command: {}\n".format(cmd)
        #Path: {}, input: {}, Process ID: {}.".format(executable_path, infile.name, pid)
        raise Exception(msg)

    # The scores returned by netMHC are ic50.
    ic50s=[]
    for score in list_res:
        ic50 = float(score[12])
        ic50s.append(ic50)
        
    infile.close()  # This automatically deletes the input file.
    return tuple(ic50s)

def predict_from_peptide_file(allele_name, binding_length, peptide_file_path):
    """ @brief Performs a prediction with a file containing a different peptide on each line as input.
        This is a special mode of netMHC that is used for efficient calculation of score distributions 
        used in generating percentiles from raw scores.
    """
    # Eliminate the asterisk (*) and colon (:) from the allele_name
    allele_name = allele_name.replace('*', '').replace(':','')
       
    cmd = [
        EXECUTABLE_FULL_PATH, '-a', allele_name, '-l', str(binding_length), '-p', peptide_file_path
    ]
    
    process = Popen(cmd, stdout=PIPE)
    stdoutdata, stderrdata = process.communicate()
    output = stdoutdata.splitlines()
    # list_res is list of tuples of prediction scores without the comment lines and column headers
    list_res = [tuple(res.split()) for res in output if not res.startswith('#') and res.split() and res.split()[0].isdigit()]
    
    scores = []
    for score in list_res:
        scores.append(score[12])
        
    return scores

def pep_score_predict_from_peptide_file(allele_name, binding_length, peptide_file_path):
    """ @brief Performs a prediction with a file containing a different peptide on each line as input.
        This is a special mode of netMHC that is used for efficient calculation of score distributions 
        used in generating percentiles from raw scores.
    """
     # Eliminate the asterisk (*) and colon (:) from the allele_name
    allele_name = allele_name.replace('*', '').replace(':','')

    cmd = [
        EXECUTABLE_FULL_PATH, '-a', allele_name, '-l', str(binding_length), '-p', peptide_file_path
    ]

    process = Popen(cmd, stdout=PIPE)
    stdoutdata, stderrdata = process.communicate()
    output = stdoutdata.decode().splitlines()
    # list_res is list of tuples of prediction scores without the comment lines and column headers
    list_res = [tuple(res.split()) for res in output if not res.startswith('#') and res.split() and res.split()[0].isdigit()]
    
    scores = []
    for score in list_res:
        scores.append((score[2], score[12]))
        
    return scores

allowed_binding_lengths = (8, 9, 10, 11, 12, 13, 14, 15)

# Retreived using option -listMHC on the executable
allowed_allele_names = [
    'BoLA-AW10',
    'BoLA-D18.4',
    'BoLA-HD6',
    'BoLA-JSP.1',
    'BoLA-T2C',
    'BoLA-T2a',
    'BoLA-T2b',
    'H-2-Db',
    'H-2-Dd',
    'H-2-Kb',
    'H-2-Kd',
    'H-2-Kk',
    'H-2-Ld',
    'HLA-A0101',
    'HLA-A0201',
    'HLA-A0202',
    'HLA-A0203',
    'HLA-A0205',
    'HLA-A0206',
    'HLA-A0207',
    'HLA-A0211',
    'HLA-A0212',
    'HLA-A0216',
    'HLA-A0217',
    'HLA-A0219',
    'HLA-A0250',
    'HLA-A0301',
    'HLA-A0302',
    'HLA-A0319',
    'HLA-A1101',
    'HLA-A2301',
    'HLA-A2402',
    'HLA-A2403',
    'HLA-A2501',
    'HLA-A2601',
    'HLA-A2602',
    'HLA-A2603',
    'HLA-A2902',
    'HLA-A3001',
    'HLA-A3002',
    'HLA-A3101',
    'HLA-A3201',
    'HLA-A3207',
    'HLA-A3215',
    'HLA-A3301',
    'HLA-A6601',
    'HLA-A6801',
    'HLA-A6802',
    'HLA-A6823',
    'HLA-A6901',
    'HLA-A8001',
    'HLA-B0702',
    'HLA-B0801',
    'HLA-B0802',
    'HLA-B0803',
    'HLA-B1401',
    'HLA-B1402',
    'HLA-B1501',
    'HLA-B1502',
    'HLA-B1503',
    'HLA-B1509',
    'HLA-B1517',
    'HLA-B1801',
    'HLA-B2705',
    'HLA-B2720',
    'HLA-B3501',
    'HLA-B3503',
    'HLA-B3701',
    'HLA-B3801',
    'HLA-B3901',
    'HLA-B4001',
    'HLA-B4002',
    'HLA-B4013',
    'HLA-B4201',
    'HLA-B4402',
    'HLA-B4403',
    'HLA-B4501',
    'HLA-B4506',
    'HLA-B4601',
    'HLA-B4801',
    'HLA-B5101',
    'HLA-B5301',
    'HLA-B5401',
    'HLA-B5701',
    'HLA-B5703',
    'HLA-B5801',
    'HLA-B5802',
    'HLA-B7301',
    'HLA-B8101',
    'HLA-B8301',
    'HLA-C0303',
    'HLA-C0401',
    'HLA-C0501',
    'HLA-C0602',
    'HLA-C0701',
    'HLA-C0702',
    'HLA-C0802',
    'HLA-C1203',
    'HLA-C1402',
    'HLA-C1502',
    'HLA-E0101',
    'HLA-E0103',
    'Mamu-A01',
    'Mamu-A02',
    'Mamu-A07',
    'Mamu-A11',
    'Mamu-A20102',
    'Mamu-A2201',
    'Mamu-A2601',
    'Mamu-A70103',
    'Mamu-B01',
    'Mamu-B03',
    'Mamu-B08',
    'Mamu-B1001',
    'Mamu-B17',
    'Mamu-B3901',
    'Mamu-B52',
    'Mamu-B6601',
    'Mamu-B8301',
    'Mamu-B8701',
    'Patr-A0101',
    'Patr-A0301',
    'Patr-A0401',
    'Patr-A0701',
    'Patr-A0901',
    'Patr-B0101',
    'Patr-B1301',
    'Patr-B2401',
    'SLA-10401',
    'SLA-10701',
    'SLA-20401',
    'SLA-30401'
]
