import sys
import re
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm
PROJECT_DIR = str(Path(__file__).resolve().parents[1])
sys.path.insert(1, PROJECT_DIR)

# Get allele data
PARENT_DIR = Path(__file__).parent
DATA_DIR = PARENT_DIR.parent / "data"
ORIG_TM_FILE = DATA_DIR / "Tools_MRO_mapping.xlsx"
TOOLS_MAPPING_FILE = DATA_DIR / "tools-mapping.tsv"
# MRO_MOLECULES_FILE = DATA_DIR / "mro_molecules.tsv"


def main():
    # Read the Tools_MRO_mapping.xlsx file, and filter it so that it only includes 'netmhciipan' alleles.
    original_tm_df = pd.read_excel(ORIG_TM_FILE, engine='openpyxl', index_col=False)

    # 16 alleles
    comblib_df = original_tm_df[original_tm_df['tool']=='comblib']

    tools_mapping_df = pd.read_csv(TOOLS_MAPPING_FILE, skipinitialspace=True, sep='\t')    

    for i, entry in comblib_df.iterrows():
        tools_label = entry['term']

        mroid = entry['MRO ID']

        row = [
            'mhcii', 
            'comblib', 
            '1.0', 
            tools_label, 
            mroid, 
            '11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30']
        
        tools_mapping_df = pd.concat([tools_mapping_df, pd.DataFrame([row], columns=tools_mapping_df.columns)], ignore_index=True)

    tools_mapping_df.to_csv(f'{DATA_DIR}/tools-mapping.tsv', sep='\t', index=False)


if __name__=='__main__':
    main()