import argparse
import json
import csv
import os
import sys
import shlex
import shutil
import subprocess
import tempfile
import textwrap
import preprocess
import postprocess
import pandas as pd
import validators
from io import StringIO
from enum import Enum
from PhbrArgumentParser import PhbrArgumentParser
from pathlib import Path

from paths import APP_ROOT

# Add the nxg-tools directory to Python path
sys.path.insert(0, str(APP_ROOT / 'libs' / 'nxg-tools'))
from nxg_common import nxg_common as common

from urllib.error import URLError
from typing import Dict, List, Union, Optional, Any, Tuple
from itertools import product

class TCellClass(str, Enum):
    I = 'i'
    II = 'ii'



def detect_file_type(filepath: str) -> str:
    try:
        with open(filepath, 'r') as f:
            json.load(f)
        return "json"
    except json.JSONDecodeError:
        return "tsv"
    
def determine_program(data: Dict[str, Any]) -> str:
    """Determines whether to run 'phbr.py' or 'mhc2phbr.py' based on input parameters."""

    # Check for 'phbr.py' unique parameter
    is_prediction = 'mhc-predictions' in data

    # Check for 'mhc2phbr.py' unique parameters
    is_mhc2phbr = any(key in data for key in [
        'mhc-predictions-result-uri', 
        'sequence-mutation-position-colname', 
        'mutation-position',
        'mhc-peptide-tsv',
        'mhc-sequence-tsv'
    ])

    # Determine which program to run
    if is_prediction and not is_mhc2phbr:
        return "prediction"  # Run phbr.py
    elif is_mhc2phbr and not is_prediction:
        return "mhc2phbr"  # Run mhc2phbr.py
    elif is_prediction and is_mhc2phbr:
        return "Error: Input JSON contains parameters for both programs. Please provide separate inputs."
    else:
        return "Error: Insufficient parameters to determine program."


def determine_tcell_class(peptide_file: str) -> Optional[TCellClass]:
    df = pd.read_csv(peptide_file, sep='\t')
    columns = list(df.columns)
    contains_netmhcpan = any("netmhcpan" in col for col in columns)
    contains_netmhciipan = any("netmhciipan" in col for col in columns)

    if contains_netmhcpan: return TCellClass.I
    if contains_netmhciipan: return TCellClass.II
    return None


def mhc_binding_result_json2tsv(params_dir: Path, t_cell_class: TCellClass, is_aggregated_result: bool = False) -> Optional[Path]:
    result_json_file = params_dir.parent / 'aggregate' / 'aggregated_result.json'
    new_result_file = None # final output path

    # aggregated result maybe passed in as a parameter
    if is_aggregated_result:
        result_json_file = params_dir    


    # Load JSON content
    with open(result_json_file, 'r') as f:
        data = json.load(f)

    header_mapping =  {
        "core.sequence_number": "seq #",
        "core.peptide": "peptide",
        "core.start": "start",
        "core.end": "end",
        "core.length": "peptide length",
        "core.allele": "allele",
        "core.peptide_index": "peptide index",
    }
    additional_header_mapping = {}

    # Define desired header mapping
    if t_cell_class.value == 'i':
        additional_header_mapping = {
            "binding.median_percentile": "median binding percentile",
            "binding.netmhcpan_el.core": "netmhcpan_el core",
            "binding.netmhcpan_el.icore": "netmhcpan_el icore",
            "binding.netmhcpan_el.score": "netmhcpan_el score",
            "binding.netmhcpan_el.percentile": "netmhcpan_el percentile"
        }
    if t_cell_class.value == 'ii':
        additional_header_mapping = {
            "binding.median_percentile": "median binding percentile",
            "binding.netmhciipan_el.core": "netmhciipan_el core",
            "binding.netmhciipan_el.score": "netmhciipan_el score",
            "binding.netmhciipan_el.percentile": "netmhciipan_el percentile"
        }
    
    header_mapping = {**header_mapping, **additional_header_mapping}

    # Find the peptide_table result
    peptide_table = next(
        (res for res in data["results"] if res["result_type"] == "peptide_table"),
        None
    )

    if peptide_table is not None:
        # Extract original columns and map to custom headers
        original_columns = peptide_table["table_columns"]
        custom_headers = [header_mapping.get(col, col) for col in original_columns]

        # Write to TSV
        new_result_file = params_dir.parent / 'aggregate' / 'peptide_table.tsv'

        if is_aggregated_result:
           new_result_file = tempfile.NamedTemporaryFile(mode='w', suffix='.tsv', delete=False).name

        with open(new_result_file, 'w', newline='') as f_out:
            writer = csv.writer(f_out, delimiter='\t')
            writer.writerow(custom_headers)
            writer.writerows(peptide_table["table_data"])
        print(f'successfully written to {new_result_file}')
    else:
        print("No peptide_table found in the JSON data.")

    return new_result_file


def create_peptide_and_sequence_files(df: pd.DataFrame) -> Tuple[str, str]:
    peptide_table_path = None
    sequence_table_path = None

    # Create a temporary file for peptide_table
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.tsv', delete=False) as tmpfile:
        peptide_df = df['peptide_table'].rename(columns={'sequence_number': 'seq #'})
        peptide_df.to_csv(tmpfile.name, index=False, sep="\t")
        peptide_table_path = tmpfile.name  # Store the file path if needed

    # Create a temporary file for input_sequence_table
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.tsv', delete=False) as tmpfile:
        # Renaming column 'sequence_number' to 'seq #' in input_sequence_table
        input_seq_df = df['input_sequence_table'].rename(columns={'sequence_number': 'seq #'})
        input_seq_df.to_csv(tmpfile.name, index=False, sep="\t")
        sequence_table_path = tmpfile.name  # Store the file path if needed

    return peptide_table_path, sequence_table_path


def uri_to_df(uri: str) -> pd.DataFrame:
    outfile_name = common.save_file_from_URI(uri)
    df = common.api_results_json_to_df(outfile_name, table_types=['peptide_table', 'input_sequence_table'])
    return df


def format_message(message: str) -> str:
    # Remove common leading whitespace from the string / indentation
    message = textwrap.dedent(message).strip()

    # Wrap the message to a maximum of 80 characters per line
    # Also, remove \n as part of the literal string
    return textwrap.fill(message, width=80).replace('\n', '')


def remove_empty_rank_rows(mhc_pred_file: str) -> None:
    df = pd.read_csv(mhc_pred_file, sep='\t')

    df = df[df['rank'] != '-']

    # reset index and save to file
    df = df.reset_index(drop=True)
    df.to_csv(mhc_pred_file, sep='\t', index=False)


def mhci_to_phbr(data: Dict[str, Any], parser: PhbrArgumentParser) -> None:
    """
    Process MHCI data and convert it to PHBR format.
    """
    peptide_table_path = data.get('mhc_peptide_tsv')
    sequence_table_path = data.get('mhc_sequence_tsv')
    mut_pos_col = data.get('mutation_position_colname')
    mut_pos = data.get('mutation_position')
    output_file = data.get('output_file')
    mhci_config = data['class_i']
    root_path = parser.PROJECT_ROOT_PATH
    mhc_args = []

    # Extract homozygous loci from alleles dictionary
    homozygous_loci = mhci_config.get('homozygous_loci')
    alleles = mhci_config.get('alleles')

    # NOTE: If 'alleles' is provided, then use it to set 'homozygous_loci'
    if alleles:
        homozygous_loci = list(alleles.keys())
        homozygous_loci = ','.join(homozygous_loci)


    '''
    CASE 1: User provides a URI for the MHCI result.
    '''
    if 'mhc_result_uri' in data:
        df = uri_to_df(data['mhc_result_uri'])
        peptide_table_path, sequence_table_path = create_peptide_and_sequence_files(df)

        # Extract unique values from the 'allele' column
        unique_alleles = df['peptide_table']['allele'].unique()
        unique_alleles = list(unique_alleles)

        method = mhci_config.get('prediction_method', {}).get('method', 'netmhcpan_el')
        valid_alleles, invalid_alleles = validators.validate_alleles(unique_alleles, class_type=TCellClass.I, method=method)

        if not valid_alleles:
            # Adding warning message for a case where the alleles are invalid for MHCI
            print("No valid alleles found. Skipping MHCI binding prediction.")
            return


    '''
    CASE 1.1: User provides a JSON file for the MHCI result.
    * Most likely used when aggregated result is passed in as a parameter.
    '''
    if 'mhc_peptide_json' in data:
        peptide_table_path = mhc_binding_result_json2tsv(Path(data['mhc_peptide_json']), TCellClass.I, is_aggregated_result=True)
        sequence_table_path = data['mhc_sequence_tsv']


    '''
    CASE 2: MHCI binding prediction needs to be run.
    * User must provide "mhc-sequence-tsv" in the input JSON file.
    * User must provide "alleles" in the input JSON file.
    '''
    if preprocess.needs_mhc_binding(data):
        print('Running MHC binding...')        
        # Run MHC binding
        mhc_binding_output_dir = preprocess.run_mhc_binding(data, TCellClass.I)
        peptide_table_path = mhc_binding_result_json2tsv(mhc_binding_output_dir, TCellClass.I)


    # Validate that peptide_table_path is not None
    if peptide_table_path is None:
        raise ValueError("Peptide table path is None.")


    '''
    Convert MHCI binding result to PHBR input format.
    * Call mhc2phbr.py to convert MHCI binding result to PHBR input format.
    '''
    tmp_mhc_pred_file = tempfile.NamedTemporaryFile(prefix="phbr-input-", delete=False)
    mhc2phbr_fpath = parser.find_file_path(start_dir=root_path, filename='mhc2phbr.py')

    # Specify --rank-colname
    # NOTE: if 'netmhcpan percentile' is not found, then use 'percentile' 
    #       as the default rank_colname
    header = pd.read_csv(peptide_table_path, sep='\t', nrows=0).columns.tolist()
    method = mhci_config.get('prediction_method', {}).get('method', '')

    matching_cols = [
        col for col in header if method in col.lower() \
        and 'percentile' in col.lower()
    ]
    rank_colname = matching_cols[0] if matching_cols else 'percentile'

    mhc_args = [
        '--peptide-output', peptide_table_path,
        '--sequence-output', sequence_table_path,
        '--phbr-input', tmp_mhc_pred_file.name,
        '--rank-colname', rank_colname # NOTE: This is the default rank_colname
    ] + mhc_args

    # NOTE: If neither is specified, it will use the central position
    if mut_pos_col:
        mhc_args = mhc_args + ['--sequence-mutation-position-colname', mut_pos_col]
    elif mut_pos:
        mhc_args = mhc_args + ['--mutation-position', mut_pos,]

    command = ['python', mhc2phbr_fpath] + mhc_args

    try:
        print('--------------------------------------------------')
        command_str = ' '.join(str(x) for x in command)
        print("Running command for mhc2phbr.py:", command_str)
        print('--------------------------------------------------')
        subprocess.run(command, capture_output=True, text=True, check=True)
        # validators.validate_mhc2phbr_output(tmp_mhc_pred_file.name)
        remove_empty_rank_rows(tmp_mhc_pred_file.name)
        
        print(f'Result of \'mhc2phbr.py\' saved to {tmp_mhc_pred_file.name}')
    except subprocess.CalledProcessError as e:
        print('Error:', e.stderr)
        print('Return Code:', e.returncode)



    '''
    Use PHBR output from 'mhc2phbr.py' (previous step) as input to PHBR.
    '''
    phbr_fpath = parser.find_file_path(start_dir=root_path, filename='phbr.py')
    if not phbr_fpath:
        message = f"""
            phbr.py not found. Please check if phbr.py exists 
            in this project.
        """
        raise KeyError(format_message(message))

    phbr_args = [
        '--mhc-predictions', tmp_mhc_pred_file.name,
        '--output-file', output_file,
        '--mhci'
    ]
    
    if homozygous_loci:
        phbr_args = phbr_args + ['--homozygous-loci', homozygous_loci,]
    
    command = ['python', phbr_fpath] + phbr_args

    try:
        print('--------------------------------------------------')
        print("Running command:", command)
        print('--------------------------------------------------')
        subprocess.run(command, capture_output=True, text=True, check=True)
        print(f'Final result of saved to {output_file}')
    except subprocess.CalledProcessError as e:
        print('Error:', e.stderr)
        print('Return Code:', e.returncode)



def mhcii_to_phbr(data: Dict[str, Any], parser: PhbrArgumentParser) -> None:
    """
    Process MHCII data and convert it to PHBR format.
    
    Args:
        mhcii_config: Dictionary containing MHCII configuration
        parser: PhbrArgumentParser instance for finding file paths
    """
    peptide_table_path = data.get('mhc_peptide_tsv')
    sequence_table_path = data.get('mhc_sequence_tsv')
    mut_pos_col = data.get('mutation_position_colname')
    mut_pos = data.get('mutation_position')
    output_file = data.get('output_file')
    mhcii_config = data['class_ii']
    root_path = parser.PROJECT_ROOT_PATH
    mhc_args = []

    # Extract homozygous loci from alleles dictionary
    homozygous_loci = mhcii_config.get('homozygous_loci')   
    alleles = mhcii_config.get('alleles')

    # NOTE: If 'alleles' is provided, then use it to set 'homozygous_loci'
    if alleles:
        homozygous_loci = list(alleles.keys())
        homozygous_loci = ','.join(homozygous_loci)


    '''
    CASE 1: User provides a URI for the MHCII result.
    '''
    if 'mhc_result_uri' in data:
        df = uri_to_df(data['mhc_result_uri'])
        peptide_table_path, sequence_table_path = create_peptide_and_sequence_files(df)

        # Extract unique values from the 'allele' column
        unique_alleles = df['peptide_table']['allele'].unique()
        unique_alleles = list(unique_alleles)

        method = mhcii_config.get('prediction_method', {}).get('method', 'netmhciipan_el')
        
        valid_alleles, invalid_alleles = validators.validate_alleles(unique_alleles, class_type=TCellClass.II, method=method)

        if not valid_alleles:
            # Adding warning message for a case where the alleles are invalid for MHCII
            print("No valid alleles found. Skipping MHCII binding prediction.")
            return
    

    '''
    CASE 1.1: User provides a JSON file for the MHCII result.
    * Most likely used when aggregated result is passed in as a parameter.
    '''
    if 'mhc_peptide_json' in data:
        peptide_table_path = mhc_binding_result_json2tsv(Path(data['mhc_peptide_json']), TCellClass.II, is_aggregated_result=True)
        sequence_table_path = data['mhc_sequence_tsv']


    '''
    CASE 2: MHCII binding prediction needs to be run.
    * User must provide "mhc-sequence-tsv" in the input JSON file.
    * User must provide "alleles" in the input JSON file.
    '''
    if preprocess.needs_mhc_binding(data):
        # Run MHC binding
        mhc_binding_output_dir = preprocess.run_mhc_binding(data, TCellClass.II)
        peptide_table_path = mhc_binding_result_json2tsv(mhc_binding_output_dir, TCellClass.II)
        print(f"Peptide Table Path: {peptide_table_path}")


    # Validate that peptide_table_path is not None
    if peptide_table_path is None:
        raise ValueError("Peptide table path is None.")
   
    '''
    Convert MHCII binding result to PHBR input format.
    * Call mhc2phbr.py to convert MHCII binding result to PHBR input format.
    '''
    tmp_mhc_pred_file = tempfile.NamedTemporaryFile(prefix="phbr-input-", delete=False)
    mhc2phbr_fpath = parser.find_file_path(start_dir=root_path, filename='mhc2phbr.py')
    
    # Specify --rank-colname
    # NOTE: if 'netmhciipan percentile' is not found, then use 'percentile' 
    #       as the default rank_colname
    header = pd.read_csv(peptide_table_path, sep='\t', nrows=0).columns.tolist()
    method = mhcii_config.get('prediction_method', {}).get('method', '')

    matching_cols = [
        col for col in header if method in col.lower() \
        and 'percentile' in col.lower()
    ]
    rank_colname = matching_cols[0] if matching_cols else 'percentile'

    mhc_args = [
        '--peptide-output', peptide_table_path,
        '--sequence-output', sequence_table_path,
        '--phbr-input', tmp_mhc_pred_file.name,
        '--rank-colname', rank_colname # NOTE: This is the default rank_colname
    ] + mhc_args

    # NOTE: If neither is specified, it will use the central position
    if mut_pos_col:
        mhc_args = mhc_args + ['--sequence-mutation-position-colname', mut_pos_col]
    elif mut_pos:
        mhc_args = mhc_args + ['--mutation-position', mut_pos,]

    command = ['python', mhc2phbr_fpath] + mhc_args

    try:
        print('--------------------------------------------------')
        command_str = ' '.join(str(x) for x in command)
        print("Running command for mhc2phbr.py:", command_str)
        print('--------------------------------------------------')
        subprocess.run(command, capture_output=True, text=True, check=True)
        # validators.validate_mhc2phbr_output(tmp_mhc_pred_file.name)
        remove_empty_rank_rows(tmp_mhc_pred_file.name)

        print(f'Result of \'mhc2phbr.py\' saved to {tmp_mhc_pred_file.name}')
    except subprocess.CalledProcessError as e:
        print('Error:', e.stderr)
        print('Return Code:', e.returncode)


    '''
    Use PHBR output from 'mhc2phbr.py' (previous step) as input to PHBR.
    '''
    phbr_fpath = parser.find_file_path(start_dir=root_path, filename='phbr.py')
    if not phbr_fpath:
        message = f"""
            phbr.py not found. Please check if phbr.py exists 
            in this project.
        """
        raise KeyError(format_message(message))

    phbr_args = [
        '--mhc-predictions', tmp_mhc_pred_file.name,
        '--output-file', output_file,
        '--mhcii'
    ]
    
    if homozygous_loci:
        phbr_args = phbr_args + ['--homozygous-loci', homozygous_loci,]
    
    command = ['python', phbr_fpath] + phbr_args

    try:
        print('--------------------------------------------------')
        print("Running command:", command)
        print('--------------------------------------------------')
        subprocess.run(command, capture_output=True, text=True, check=True)
        print(f'Final result of saved to {output_file}')
    except subprocess.CalledProcessError as e:
        print('Error:', e.stderr)
        print('Return Code:', e.returncode)



def combine_mhc_results(mhci_output: str, mhcii_output: str, output_file: str) -> None:
    """
    Combine MHCI and MHCII results into a single file when they share the same peptide.
    
    Args:
        mhci_output: Path to MHCI output file
        mhcii_output: Path to MHCII output file
        output_file: Path to combined output file
    """
    print('--------------------------------------------------')
    print("Combining MHCI and MHCII results...")

    # Read both files
    mhci_df = pd.read_csv(mhci_output, sep='\t')
    mhcii_df = pd.read_csv(mhcii_output, sep='\t')
    
    print(f"MHCI columns: {mhci_df.columns.tolist()}")
    print(f"MHCII columns: {mhcii_df.columns.tolist()}")
    
    # Rename PHBR columns to distinguish between MHCI and MHCII
    mhci_df = mhci_df.rename(columns={'PHBR': 'PHBR-I'})
    mhcii_df = mhcii_df.rename(columns={'PHBR': 'PHBR-II'})
    
    # Merge on peptide column
    combined_df = pd.merge(
        mhci_df,
        mhcii_df,
        on=['peptide'],
        how='outer',
        suffixes=('', '-II')  # Only add suffix to MHCII columns that might conflict
    )
    
    # Define the desired column order
    column_order = [
        'peptide',
        '#A', '#B', '#C',  # MHCI columns
        '#DP', '#DQ', '#DR',  # MHCII columns
        'PHBR-I', 'PHBR-II'  # PHBR scores
    ]
    
    # Reorder columns
    # First, ensure all expected columns exist (fill with 0.0 if missing)
    for col in column_order:
        if col not in combined_df.columns:
            combined_df[col] = 0.0
    
    # Then reorder columns
    combined_df = combined_df[column_order]
    
    # Fill NaN values with 0.0
    combined_df = combined_df.fillna(0.0)
    
    # Save combined results
    combined_df.to_csv(output_file, sep='\t', index=False)


def replace_extension_with_json(filepath: str) -> str:
    """
    Replace the extension of a file with .tsv
    
    Args:
        filepath: Path to the file
        
    Returns:
        Path with .tsv extension
    """
    return str(Path(filepath).with_suffix('.json'))


def main() -> None:
    parser = PhbrArgumentParser()
    args = parser.parse_args()

    if args.subcommand == 'predict':
        # Load the JSON file
        data = json.load(args.input_json)
        needs_json_conversion = False

        # Get common parameters
        if args.output_prefix:
            # NOTE: set the flag to indicate that the output file is a JSON file.
            #       This will be used to convert the output file to a JSON file.
            #       Actual conversion to JSON will happen towards the end of the script.
            if args.output_format == 'json':
                needs_json_conversion = True

            # Output file format is defaulted to 'tsv'
            output_file = args.output_prefix + '.tsv'

        else:
            message = """ 
                Invalid JSON format. The input JSON must specify the parameter
                '--output-prefix'/'--output-format' to specify the output file.
            """
            raise KeyError(format_message(message))

        final_output_file = data['output_file'] = output_file

        # Add metadata
        data['metadata'] = {
            'subcommand': args.subcommand,
            'root_path': parser.PROJECT_ROOT_PATH,
        }

        # NOTE: Turn "input_neoepitopes" into "mhc_sequence_tsv"
        if 'input_neoepitopes' in data:
            df = pd.read_csv(StringIO(data['input_neoepitopes']), sep='\t')
            df = df.rename(columns=lambda x: 'sequence' if 'peptide' in x.lower() else x)
            df.insert(0, 'seq #', range(1, len(df) + 1))

            # Create a temporary file
            with tempfile.NamedTemporaryFile(mode='w+', suffix='.tsv', delete=False) as tmp_file:
                df.to_csv(tmp_file.name, index=False, sep='\t')
                print(f"Temporary file created at: {tmp_file.name}")
            
            data['mhc_sequence_tsv'] = tmp_file.name


        # ================================
        # Handle combined case
        # ================================
        if 'class_i' in data and 'class_ii' in data:
            print("Processing combined MHCI and MHCII input...")
            # Create temporary files for individual outputs
            tmp_phbr_mhci_output = tempfile.NamedTemporaryFile(prefix="phbr-mhci-output-", suffix=".tsv", delete=False).name
            tmp_phbr_mhcii_output = tempfile.NamedTemporaryFile(prefix="phbr-mhcii-output-", suffix=".tsv", delete=False).name

            data['output_file'] = tmp_phbr_mhci_output

            mhci_to_phbr(data, parser)
            # NOTE: This will be the output file of mhci_to_phbr.py
            phbr_mhci_output = data['output_file']
            print(f"PHBR output saved to {phbr_mhci_output}")

            data['output_file'] = tmp_phbr_mhcii_output
            # print("************************************************")
            # print(data)
            # print("************************************************")
            mhcii_to_phbr(data, parser)
            phbr_mhcii_output = data['output_file']
            print(f"PHBR output saved to {phbr_mhcii_output}")

            data['output_file'] = final_output_file
            print(f"Final output file: {data['output_file']}")

            # only run if both MHCI and MHCII results file content are not empty
            if os.path.getsize(phbr_mhci_output) > 0 and os.path.getsize(phbr_mhcii_output) > 0:
                combine_mhc_results(phbr_mhci_output, phbr_mhcii_output, final_output_file)
                print(f"Combined results saved to: {final_output_file}")
                postprocess.filter_output_file(final_output_file, TCellClass.I)
                postprocess.filter_output_file(final_output_file, TCellClass.II)
            elif os.path.getsize(phbr_mhci_output) == 0:
                postprocess.filter_output_file(phbr_mhcii_output, TCellClass.II)
                # Need to copy the content of phbr_mhcii_output to final_output_file
                shutil.copy(phbr_mhcii_output, final_output_file)
            elif os.path.getsize(phbr_mhcii_output) == 0:
                postprocess.filter_output_file(phbr_mhci_output, TCellClass.I)
                # Need to copy the content of phbr_mhci_output to final_output_file
                shutil.copy(phbr_mhci_output, final_output_file)

        # ================================
        # Handle all types of MHCI-related input for PHBR
        # ================================
        elif 'class_i' in data:
            mhci_to_phbr(data, parser)

            # NOTE: When parameters are invalid, the output file will be None.
            # In this case, it will skip MHCI binding prediction.
            # Thus, if output file exists, we proceed with the rest of the script.
            if data['output_file'] and os.path.exists(data['output_file']):
                print(f"PHBR output saved to {data['output_file']}")
                postprocess.filter_output_file(data['output_file'], TCellClass.I)
                if data['output_file'] != final_output_file:
                    shutil.copy(data['output_file'], final_output_file)

        # ================================
        # Handle all types of MHCII-related input for PHBR
        # ================================
        elif 'class_ii' in data:
            mhcii_to_phbr(data, parser)

            # NOTE: When parameters are invalid, the output file will be None.
            # In this case, it will skip MHCII binding prediction.
            # Thus, if output file exists, we proceed with the rest of the script.
            if data['output_file'] and os.path.exists(data['output_file']):
                print(f"PHBR output saved to {data['output_file']}")
                postprocess.filter_output_file(data['output_file'], TCellClass.II)
                if data['output_file'] != final_output_file:
                    shutil.copy(data['output_file'], final_output_file)

        print(f"Final output file: {final_output_file}")

        # if final_output_file does not exist, then terminate the script
        if not os.path.exists(final_output_file):
            print("No output file found. Terminating script.")
            sys.exit(1)

        if needs_json_conversion:
            df = pd.read_csv(final_output_file, sep='\t')
            json_result = postprocess.df_to_json(df)
            postprocess.save_json_to_file(json_result, final_output_file)

        
    if args.subcommand == 'preprocess':
        # Validate Arguments
        parser.validate_args(args)

        # NOTE: For PHBR, we do not need to split as we expect PHBR inputs to be small.
        # MHCI and MHCII will perform their own split, however.
        # Handling all job_description file creation in the preprocess file.
        preprocess.run(parser, **vars(args))
        sys.exit()

    if args.subcommand == 'postprocess':
        # Validate Arguments
        # parser.validate_args(args)

        postprocess.run(**vars(args))
        sys.exit()

if __name__=='__main__':
    main()