import pkg_resources
import pickle

package_name = 'mhcii_tepitope_predictor'
pickle_filename = 'tepitope_predictor_data_2016-01-29.p'
pickle_filepath = pkg_resources.resource_filename(package_name, pickle_filename)

class TepitopePredictorDataManager(object):
    def __init__(self):
        with open(pickle_filepath, 'rb') as pfile:
            self.data_dict = pickle.load(pfile)

    def get_analytical_data(self, allele):
        ''' Returns a data lookup dictionary for the given allele.
            Returns a dictionary similar to the following: {
              'A': (-999.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
              'C': (-999.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
              'D': (-999.0, -1.3, -1.3, -2.4, 0.0, -2.7, -2.0, 0.0, -1.9),
              ...
              'V': (-1.0, 2.1, 0.5, -0.05, 0.0, -1.1, 0.7, 0.0, 0.3),
              'W': (0.0, -0.1, 0.0, -1.8, 0.0, -2.4, -0.08, 0.0, -1.4),
              'Y': (0.0, 0.9, 0.8, -1.1, 0.0, -2.0, 0.5, 0.0, -0.9)}
            }
            Where keys are amino acids and the tuple is binding values based on position.
        '''
        return self.data_dict[allele]

data_manager = TepitopePredictorDataManager()

def single_prediction_tepitope(sequence_list, allele_length_2tuple_list, coreseq_len=9):    
    # TODO (JY): we need to decide what to do if the sequence length less than the binding_length. Now it was ignored.
    method_name = 'tepitope'    
    predictions = {}
    for sequence in sequence_list:
        for (allele_name, binding_length) in allele_length_2tuple_list:
            matrix = data_manager.get_analytical_data(allele_name)
            if len(list(matrix.values())[0]) != coreseq_len:
                msg = 'The width of the tepitope lookup matrix needs to match the'\
                      ' core_sequence_length'
                logger.critical(msg)
                raise Exception(msg)
            # if the sequence length less than the binding_length, ignore it.
            if len(sequence) < binding_length:
                continue
            scores = []
            # Check every binding_length wide subsequence of 'sequence'.
            for nterm in range(len(sequence) - binding_length + 1):
                # TODO: Why -13?
                score = -13
                core = ""
                # Check every coreseq_len wide sub-subsequence
                for i in range(binding_length - coreseq_len + 1):
                    temp_score = 0
                    for pos in range(coreseq_len):
                        amino_acid = sequence[nterm + pos + i]
                        try:
                            temp_score += matrix[amino_acid][pos]
                        except KeyError:
                            msg = 'Invalid character "{}" in sequence "{}".'\
                                .format(amino_acid, sequence)
                            logger.error(msg)
                            raise
                    if (temp_score < -12.5):
                        temp_score = -12.5
                    if score < temp_score:
                        score = temp_score
                        # current position
                        curpos = nterm + i
                        core = sequence[curpos:curpos + coreseq_len]
                consensus_score = (core, score)
                scores.append(consensus_score)
            key = (sequence, allele_name, binding_length)
            predictions[key] = tuple(scores)
    return predictions

