#! /usr/bin/python
from multiprocessing.spawn import get_preparation_data
import os
import json
from PercentilesCalculators import MHCIPercentilesCalculator

MHCNP_DOCKER_IMG = os.environ.get("MHCNP_DOCKER_IMG", "harbor.lji.org/iedb-public/mhc-np")

def add_percentile(result_data):
    from mhcnp_percentile_data import score_distributions
    percentiles_calculator = MHCIPercentilesCalculator(score_distributions)
    method_name = 'mhcnp'
    percentile_dict = {}
    if result_data.get('results', None):
        for result in result_data.get('results'):
            if result['type'] == 'peptide_table' and 'mhcnp' in result['method']:
                result['method'] = "binding.mhcnp"
                result['table_columns'][-1]='score'
                result['table_columns'].append("percentile")
                result['table_columns'].pop(0)
                for row in result['table_data']:
                    allele = row[2]
                    peptide_length = len(row[1])
                    score = row[-1]
                    if not allele in percentile_dict:
                        data_key = (method_name, allele, peptide_length)
                        percentile_dict[allele] = score_distributions[data_key]
                    ds_for_key = percentile_dict[allele]
                    rank = percentiles_calculator.percentile_scores(score, ds_for_key, method_name)
                    row.append(rank)
                    row.pop(0)
    return result_data


def add_percentile_to_file(result_path):
    with open(result_path, "r+") as f:
        data = json.load(f)
        f.seek(0)
        json.dump(add_percentile(data), f, indent=2)
        f.truncate()


def predict(**kwargs):
    lengths = kwargs.get('lengths')
    alleles = kwargs.get('input_allele')
    sequence_path = kwargs.get('fname')
    output_path = kwargs.get('output_path')
    seq_file_type = kwargs.get('seq_file_type')
    if seq_file_type == 'peptides':
        seq_file_type_para = '-p'
    elif seq_file_type == 'fasta':
        seq_file_type_para = '-f'        
    elif seq_file_type not in ['fasta','peptides']:
        raise ValueError('can not accept seq_file_type: %s' % seq_file_type)

    allele_list = []
    length_list = []
    for allele in alleles.split(','):
        for length in lengths.split(','):
            allele_list.append(allele)
            length_list.append(length)
    alleles = " ".join(allele_list)
    lengths = " ".join(length_list)

    sequence_dir_path, sequence_filename = os.path.split(os.path.abspath(sequence_path))
    output_dir_path, output_filename = os.path.split(os.path.abspath(output_path))

    docker_cmd = f"docker run -u {os.getuid()} -v {sequence_dir_path}:/src/mhcnp/input -v {output_dir_path}:/src/mhcnp/output  {MHCNP_DOCKER_IMG}"
    mhcnp_cmd = f"python run_mhcnp.py {seq_file_type_para} /src/mhcnp/input/{sequence_filename} -a {alleles} -l {lengths} -j /src/mhcnp/output/{output_filename}"
    cmd = f"{docker_cmd} {mhcnp_cmd}"

    with os.popen(cmd) as process:
        result = process.read()

    add_percentile_to_file(os.path.join(output_dir_path, output_filename))
    return result
    
if __name__ == '__main__':
    #test script if need
    pass

