import os
import pkg_resources
from collections import namedtuple, defaultdict
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
import logging

logger = logging.getLogger(__name__)

_executable_filename = \
    pkg_resources.resource_filename('icerfire_1_0_executable', 'bashscripts/ICERFIRE.sh')
EXECUTABLE_PATH = os.path.join(os.path.dirname(__file__), _executable_filename)
print(EXECUTABLE_PATH)

available_allele_list = '''
HLA-A0101
HLA-A0201
HLA-A0202
HLA-A0203
HLA-A0205
HLA-A0206
HLA-A0210
HLA-A0211
HLA-A0224
HLA-A0301
HLA-A0302
HLA-A1101
HLA-A1102
HLA-A2402
HLA-A2501
HLA-A2601
HLA-A2902
HLA-A3001 
HLA-A3002
HLA-A3101
HLA-A3301
HLA-A6801
HLA-A6802
HLA-A6901
HLA-A8001
HLA-B0702
HLA-B0801 
HLA-B1302
HLA-B1501
HLA-B1801
HLA-B2702
HLA-B2705 
HLA-B3501
HLA-B3503
HLA-B3701
HLA-B3704
HLA-B3801
HLA-B3901
HLA-B3906
HLA-B4001
HLA-B4002
HLA-B4102
HLA-B4402
HLA-B4403
HLA-B4408
HLA-B4901 
HLA-B5101
HLA-B5201
HLA-B5401
HLA-B5601
HLA-B5701
HLA-C0102
HLA-C0303
HLA-C0304
HLA-C0401
HLA-C0501
HLA-C0602
HLA-C0701
HLA-C0702
HLA-C0802 
HLA-C1202
HLA-C1203
HLA-C1402
HLA-C1403
HLA-C1502'''.split()

def generate_sequences(alleles, input_sequence_text):
    if 'HLA' in input_sequence_text:
        contain_hla = True
        input_sequence_text = input_sequence_text.replace('*','').replace(':','')
        sequences = [s.split(',') for s in input_sequence_text.split('\n')]
    else:
        contain_hla = False
        sequences = [s.split(',') for s in input_sequence_text.split('\n')]
        new_sequences = []
        alleles = alleles.replace('*','').replace(':','').split(',')
        for allele in alleles:
            if allele not in available_allele_list:
                print('Warning: allele %s is not in the available allele list and will be skipped.' % allele)
                continue
            for seq in sequences:
                seq = seq.copy()
                seq.insert(2,allele)
                new_sequences.append(seq)
        sequences = new_sequences
    for seq in sequences:
        # swapping first two elements wild-type peptide and mutant peptide 
        # because of the difference format between PVC input and icerfire input
        seq[0],seq[1] = seq[1],seq[0]
        # check input format
        if len(seq) > 3 and not seq[3].replace('.', '', 1).isdigit():
            raise ValueError('Format issue: each row of input_sequence_text should be: <wild-type peptide>, <mutant peptide>, [HLA allele (optinal)], [expression value (optinal)]')
    return sequences    

def icerfire_prediction(params, output_prefix=None, output_format=None, assume_valid_flag=None):
    sequences = generate_sequences(params['alleles'], params['input_sequence_text'])

    tmp = NamedTemporaryFile(mode='w', delete=False)
    try:
        src_dir = os.path.abspath(os.path.dirname(__file__))
        icerfire_executable_path = os.path.join(src_dir, 'bashscripts', 'ICERFIRE.sh')
        #print(tmp.name)
        for seq in sequences:
            tmp.write(','.join(seq))
            tmp.write('\n')
        tmp.close()
        icerfire_cmd = [icerfire_executable_path, '-f',  tmp.name, '-a', 'true', '-u', 'false']
        print(' '.join(icerfire_cmd))
        logging.debug(' '.join(icerfire_cmd))
        process = Popen(icerfire_cmd, stdout=PIPE)
        stdoutdata, stderrdata_ignored = process.communicate()
        stdoutdata = stdoutdata.decode()
        for row in stdoutdata.split('\n'):
            if row.startswith('final csv result saved to: '):
                final_csv_path = row.split('final csv result saved to: ')[1]
        print('stdout %s' % stdoutdata)
        logging.debug('Raw output:\n{}'.format(stdoutdata))
        print('prediction done')
        return final_csv_path
    finally:
        os.unlink(tmp.name)
