import unittest
import os
import sys
import subprocess
import pandas as pd
from pathlib import Path
DATA_DIR = str(Path(__file__).resolve().parents[0]) + "/../data"
NETMHCPAN_PATH = "/home/hkim/IEDB/executable/netmhcpan-4.1-executable/netmhcpan_4_1_executable/netMHCpan"
TEST_PATH="/home/hkim/IEDB/executable/netmhcpan-4.1-executable/netmhcpan_4_1_executable/test/test.pep"
PROJECT_DIR = str(Path(__file__).resolve().parents[1])
sys.path.insert(1, PROJECT_DIR)
sys.path.insert(1, NETMHCPAN_PATH)
SKIP_GITLAB_CI = os.getenv('SKIP_GITLAB_CI', False)
from allele_validator import Allele_Validator


@unittest.skipIf(SKIP_GITLAB_CI, "CI Can't seem to find location of netmhcpan_4_1_executable")
class NetMHCpanAllelesValidityTest(unittest.TestCase):
    ALLELE_COL = 1
    MHC_ALLELES_DF = pd.read_table('%s/data/mhc_alleles.tsv' %(PROJECT_DIR))
    TOOLS_MAPPING_FILE_PATH = '%s/data/tools-mapping.tsv' %(PROJECT_DIR)
    VALIDATOR = Allele_Validator()


    '''test cases for netmhcpan 4.0 and netmhcpan_el 4.0'''
    def test_base_case(self):
        invalid_allele_counter = 0
        invalid_alleles = []

        # Dataframe containing netmhcpan entries
        tools_mapping_df = pd.read_csv(self.TOOLS_MAPPING_FILE_PATH, skipinitialspace=True, sep='\t')
        tools_mapping_df = tools_mapping_df[(tools_mapping_df['Tool']=='netmhcpan')]

        tm_tool_labels = tools_mapping_df['Tool Label'].tolist() # 11060
        tm_iedb_labels = tools_mapping_df['IEDB Label'].tolist()
        tm_label_mapper = list(zip(tm_tool_labels, tm_iedb_labels))

        complete_alleles_dict = {}
        invalid_neighbor_alleles_dict = {}
        for tm_tools_label, tm_iedb_label in tm_label_mapper:
            print("IEDB_Label: %s\nTool Label: %s" %(tm_iedb_label, tm_tools_label))

            input_allele, closest_allele, output_allele = self.get_all_output_alleles(tm_tools_label)

            if (input_allele, closest_allele, output_allele) == (0,0,0):
                # There should be total 9 alleles where the results doesn't get outputted.
                # BoLA-NC1:00101
                # BoLA-NC1:00201
                # BoLA-NC1:00301
                # BoLA-NC1:00401
                # BoLA-NC2:00101
                # BoLA-NC2:00102
                # BoLA-NC3:00101
                # BoLA-NC4:00101
                # BoLA-NC4:00201
                invalid_alleles.append((tm_tools_label, tm_iedb_label))
                invalid_allele_counter = invalid_allele_counter + 1
                continue

            iedb_label = self.VALIDATOR.convert_synonym_to_iedblabel(synonym=closest_allele)
            
            
            if not iedb_label:
                # print("Synonym is probably invalid: %s" %(closest_allele))
                # print("IEDB Label: %s" %(tm_iedb_label))
                # print("Allele: %s" %(input_allele))
                # print("Closest Allele: %s" %(closest_allele))
                # print("Output Allele: %s" %(output_allele))
                invalid_neighbor_alleles_dict[input_allele] = {
                    "iedb_allele": tm_iedb_label,
                    "closest_allele": closest_allele,
                    "output_allele": output_allele
                }

            
            complete_alleles_dict[input_allele] = {
                "iedb_allele": tm_iedb_label,
                "closest_allele": closest_allele,
                "output_allele": output_allele
            }


        # # ===============================================
        # # Allele: SLA-3:0601
        # # Closest Allele: SLA-304:01
        # # Output Allele: SLA-3:0601
        # # ===============================================
        # invalid_neighbor_alleles_dict = {}
        # invalid_neighbor_alleles_dict["SLA-3-YDY01"] = {
        #             "closest_allele": "SLA-304:01",
        #             "output_allele": "SLA-3-YDY01"
        #         }

        # Mamu-AG:02012	Mamu-AG:02012	Mamu-A70103
        # s = self.VALIDATOR.convert_synonym_to_iedblabel(synonym="Mamu-AG:02012")
        # print("!:::: %s" %(s))
        # exit()

        # self.generate_report(invalid_neighbor_alleles_dict)
        self.generate_report(complete_alleles_dict)


    def generate_report(self, allele_dict):
        output_data = []

        # Closest allele in the dictionary are all invalid
        for input_allele, v in allele_dict.items():
            input_allele_is_in_db = 0
            closest_allele_is_in_db = 0
            output_allele_is_in_db = 0
            actual_iedb_allele = v["iedb_allele"]
            closest_allele = v["closest_allele"]
            output_allele = v["output_allele"]

            # Input allele should all be tool_label
            input_iedb_label = self.VALIDATOR.convert_methodlabel_to_iedblabel(input_allele, method="netmhcpan")
            # Not sure if output_allele should be tool_label or iedb_label
            output_iedb_label = self.VALIDATOR.convert_methodlabel_to_iedblabel(output_allele, method="netmhcpan")
            output_method_label = self.VALIDATOR.convert_iedblabel_to_methodlabel(output_allele, method="netmhcpan")
            # Check to see if closest_allele is in the database
            closest_iedb_label = self.VALIDATOR.convert_methodlabel_to_iedblabel(closest_allele, method="netmhcpan")
            closest_method_label = self.VALIDATOR.convert_iedblabel_to_methodlabel(closest_allele, method="netmhcpan")


            # Input Allele ===============================================================================
            input_found_as = 'NA'
            input_allele_is_in_db = 0
            input_allele_mapped_label = 'NA'
            if input_iedb_label: 
                input_found_as = 'tool_label'
                input_allele_is_in_db = 1
                input_allele_mapped_label = input_iedb_label
            else:
                input_syn_allele = self.VALIDATOR.convert_synonym_to_iedblabel(input_allele)
                if input_syn_allele:
                    input_found_as = 'synonym'
                    input_allele_is_in_db = 1
                    input_allele_mapped_label = input_syn_allele

            
            # Output Allele ===============================================================================
            output_found_as = 'NA'
            output_allele_mapped_label = 'NA'
            if output_iedb_label:
                # output_allele is found as methodlabel
                output_found_as = 'tool_label'
                output_allele_is_in_db = 1
                output_allele_mapped_label = output_iedb_label
            elif output_method_label:
                output_found_as = 'iedb_label'
                output_allele_is_in_db = 1
                output_allele_mapped_label = output_allele
            else:
                output_syn_allele = self.VALIDATOR.convert_synonym_to_iedblabel(output_allele)
                if output_syn_allele:
                    output_found_as = 'synonym'
                    output_allele_is_in_db = 1
                    output_allele_mapped_label = output_syn_allele


            # Closest Allele ===============================================================================
            closest_found_as = 'NA'
            closest_allele_mapped_label = 'NA'
            if closest_iedb_label:
                # output_allele is found as methodlabel
                closest_found_as = 'tool_label'
                closest_allele_is_in_db = 1
                closest_allele_mapped_label = closest_iedb_label
            elif closest_method_label:
                closest_found_as = 'iedb_label'
                closest_allele_is_in_db = 1
                closest_allele_mapped_label = closest_allele
            else:
                closest_syn_allele = self.VALIDATOR.convert_synonym_to_iedblabel(closest_allele)
                if closest_syn_allele:
                    closest_found_as = 'synonym'
                    closest_allele_is_in_db = 1
                    closest_allele_mapped_label = closest_syn_allele


            output_data.append([actual_iedb_allele, input_allele, output_allele, closest_allele, input_allele_is_in_db, input_found_as, input_allele_mapped_label, output_allele_is_in_db, output_found_as, output_allele_mapped_label, closest_allele_is_in_db, closest_found_as, closest_allele_mapped_label])


        report_df = pd.DataFrame(output_data, columns=["iedb_allele", "input_allele (tool_label)", "output_allele", "closest_allele", "input_in_allele_db", "input_found_as", "input_allele_mapped_to_iedb_label", "output_in_allele_db", "output_found_as", "output_allele_mapped_to_iedb_label", "closest_in_allele_db", "closest_found_as", "closest_allele_mapped_to_iedb_label"])

        # Write Dataframe to CSV format
        report_df.to_csv('{}/{}'.format(DATA_DIR, "missing_closest_alleles.csv"), sep='\t', index=False)


    def get_all_output_alleles(self, allele):
        output = subprocess.run([NETMHCPAN_PATH, "-p", TEST_PATH, "-a", allele], capture_output=True, text=True)
        result = output.stdout
        result = result.split('\n')

        essential_info = []
        for line in result:
            if line.startswith('#'):
                continue
            if line.startswith("-"):
                continue
            if line.strip().startswith("Pos"):
                # Ignore Header
                continue
            if not line:
                continue

            if len(essential_info) <= 1 :
                essential_info.append(line)

        # print("Essential Info:\n%s" %(essential_info))

        # if not essential_info:
        #     print("Allele: %s" %(allele))
        #     print("Result:\n%s" %(result))

        # Exit out early if there are no results due to invalid allele
        if essential_info[0].startswith("Error"):
            return (0,0,0)


        # Essential_info list will always have 2 items.
        # Item 1: Text containing original allele and its closest neighbor allele.
        # Item 2: First row of the actual result table.
        info_header = essential_info[0]
        partial_table_result = essential_info[1]

        info_header = info_header.split(' ')
        closest_allele = info_header[-1][:-1]

        partial_table_result = partial_table_result.strip().split(' ')
        partial_table_result = list(filter(None, partial_table_result))
        output_allele = partial_table_result[self.ALLELE_COL]

        return (allele, closest_allele, output_allele)

if __name__=='__main__':
    unittest.main()