from PepxParser import PepXArgumentParser
import json
import sys
import argparse
import pandas as pd
# from pepx_argparser import PepxArgumentParser
from split import split_parameters_file
from aggregation import aggregate_result_file


def export_result_from_aggregate(**kwargs):
    output_format = kwargs['output_format']
    output_file = kwargs['output_file']
    result = kwargs['result']

    # process collapsed header
    collapsed_headers = json.loads(result['collapsed_header_map']).values()
    formatted_collapsed_headers = ['pepx.' + header for header in collapsed_headers]
    
    # process expanded header
    expanded_headers = json.loads(result['expanded_header_map']).values()
    formatted_expanded_headers = ['pepx.' + header for header in expanded_headers]
    
    # components of json result file
    warnings = []
    result_json_collapsed = {
        # collapsed or expanded
        "type": "collapsed",
        "table_columns": formatted_collapsed_headers,
        "table_data": result['collapsed']
    }

    result_json_expanded = {
        # collapsed or expanded
        "type": "expanded",
        "table_columns": formatted_expanded_headers,
        "table_data": result['expanded']
    }

    json_result = {
        "warnings": warnings,
        "results": [result_json_collapsed, result_json_expanded]
    }

    json_result = json.dumps(json_result)

    outfile_path = output_file.name + '.' + output_format

    with open(outfile_path, 'w') as f:
        f.write(json_result)


def format_to_json_result(result):
    '''
    The 'pg_summary' variable is not important here as this function will return
    JSON result containing both collaped / expanded results.
    '''
    peptide_table_header = list(json.loads(result['collapsed_header_map']).values())
    peptide_table_result = result['collapsed']

    peptide_gene_table_header = list(json.loads(result['expanded_header_map']).values())
    peptide_gene_table_result = result['expanded']

    peptide_result_info = {
        'method': 'pepx',
        'type': 'peptide_table',
        'table_columns': peptide_table_header,
        'table_data': peptide_table_result
    }

    peptide_gene_result_info = {
        'method': 'pepx',
        'type': 'peptide_gene_table',
        'table_columns': peptide_gene_table_header,
        'table_data': peptide_gene_table_result
    }

    formatted_result = {
        'warnings': [],
        'results': [
            peptide_result_info,
            peptide_gene_result_info
        ]
    }

    return json.dumps(formatted_result)



def display_or_export_result(**kwargs):
    # If -j/-t flag is not set, then save the file as CSV in
    # current directory. If path is not set and only name of the
    # file is set, then save to current dir.
    output_format = kwargs['output_format']
    output_file = kwargs['output_file']

    pg_summary = kwargs['pg_summary']
    result = kwargs['result']

    if output_file :
        if output_format == 'csv' :
            if pg_summary :
                collapsed_header = list(json.loads(result['collapsed_header_map']).keys())
                result_df = pd.DataFrame(result['collapsed'])
                result_df.to_csv(output_file, header=collapsed_header, sep=',')
            else :
                # Default should be the expanded result
                expanded_header = list(json.loads(result['expanded_header_map']).keys())
                result_df = pd.DataFrame(result['expanded'])
                result_df.to_csv(output_file, header=expanded_header, sep=',')
                
        if output_format == 'tsv' :
            if pg_summary :
                collapsed_header = list(json.loads(result['collapsed_header_map']).keys())
                result_df = pd.DataFrame(result['collapsed'])
                result_df.to_csv(output_file, header=collapsed_header, sep='\t')
            else :
                expanded_header = list(json.loads(result['expanded_header_map']).keys())
                result_df = pd.DataFrame(result['expanded'])
                result_df.to_csv(output_file, header=expanded_header, sep='\t')
        
        if output_format == 'json' :
            with open(output_file, 'w') as f:
                f.write(result)

    else :
        if output_format == 'csv' or output_format == 'tsv' :
            if pg_summary :
                collapsed_header = list(json.loads(result['collapsed_header_map']).keys())
                result_df = pd.DataFrame(result['collapsed'], columns=collapsed_header)
                print(result_df.to_string(index=False))
            else :
                expanded_header = list(json.loads(result['expanded_header_map']).keys())
                result_df = pd.DataFrame(result['expanded'], columns=expanded_header)
                print(result_df.to_string(index=False))

        if output_format == 'json' :
            print(result)

def process(**kwargs):
    sequences = kwargs['sequences']
    qlevel = kwargs['qlevel']
    # dataset = kwargs['dataset']
    dataset_id = kwargs['dataset_id']
    pepx_db = kwargs['database']
    unmapped_peptides = kwargs['unmapped_peptides']

    collapsed_header = ""
    expanded_header = ""

    peptide_dictionary = pepx_db.peptide_lookup(sequences, dataset_id, qlevel)

    
    if len(peptide_dictionary['collapsed']) < 1:
        '''
        Really the only parameter other than sequences are:
        1. quantification
        2. dataset-id
        Quantification is already checked, so the only reason to fail to get
        result is due to 'dataset-id'.
        '''
        raise ValueError('Invalid dataset-id is provided.')

    # Used for Summary table
    # available_peptides = peptide_dictionary['collapsed']['peptide'].tolist()

    # Rename table headers so that it can be used in the downloaded file.
    if qlevel == 'gene':
        expanded_header = [ 
            "Peptide", 
            "Total Peptide TPM", 
            "Median Peptide TPM", 
            "Max Peptide TPM",
            "Total Scaled Peptide TPM", 
            "Median Scaled Peptide TPM",
            "Max Scaled Peptide TPM",
            "Total Gene TPM",
            "Median Gene TPM",
            "Max Gene TPM",
            "Gene Symbols",
            "Gene ENSG IDs",
            "Gene TPMs",
            "Peptide TPMs",
            "Scaled Peptide TPMs",
            "Proteins Encoded by Gene",
            "Proteins Containing Peptide (per Gene)",
            "Fraction of Proteins Containing Peptide (per Gene)",
            "Gene Mean Occurrences per Protein"
        ]
        collapsed_header = [
            "Peptide",
            "Gene ENSG ID",
            "Gene Symbol",
            "Proteins Encoded by Gene",
            "Proteins Containing Peptide",
            "Fraction of Matching Proteins",
            "Mean Occurrences per Protein",
            "Gene TPM",
            "Peptide TPM",
            "Scaled Peptide TPM",
        ]

    if qlevel == 'transcript':
        expanded_header = [
            "Peptide",
            "Total Peptide TPM",
            "Median Peptide TPM",
            "Max Peptide TPM",
            "Total Transcript TPM",
            "Median Transcript TPM",
            "Max Transcript TPM",
            "Number of Genes",
            "Number of Transcripts",
            "Gene Symbols",
            "Gene ENSG IDs",
            "Transcript ENST IDs",
            "Protein ENSP IDs",
            "Number of Transcript Occurences",
            "Transcript TPMs",
            "Transcript Peptide TPMs"
        ]
        collapsed_header = [
            "Peptide",
            "Gene ENSG ID",
            "Protein ENSP ID",
            "Transcript ENST ID",
            "Gene Symbol",
            "Number of Occurences",
            "Transcript TPM",
            "Peptide TPM"
        ]

    collapsed_header_orig = list(peptide_dictionary['collapsed'].columns)[1:]
    expanded_header_orig = list(peptide_dictionary['expanded'].columns)[1:]

    if unmapped_peptides :
        # include unmapped peptides to the result dataframes
        upeptides = peptide_dictionary['unmapped']
        for upeptide in upeptides :
            ''' Collapsed Dataframe '''
            row_data = ['-'] * len(collapsed_header_orig)
            row_data[0] = upeptide
            
            # Empty row containing the unmapped peptide
            new_row = pd.DataFrame([row_data], columns=collapsed_header_orig)

            # Append this row to the end of the existing 'collapsed' dataframe
            collapsed_res = pd.concat([peptide_dictionary['collapsed'], new_row], ignore_index=True)
            peptide_dictionary['collapsed'] = collapsed_res

            ''' Expanded Dataframe '''
            row_data = ['-'] * len(expanded_header_orig)
            row_data[0] = upeptide

            # Empty row containing the unmapped peptide
            new_row = pd.DataFrame([row_data], columns=expanded_header_orig)

            # Append this row to the end of the existing 'collapsed' dataframe
            expanded_res = pd.concat([peptide_dictionary['expanded'], new_row], ignore_index=True)
            peptide_dictionary['expanded'] = expanded_res

    collapsed_header_map = dict(zip(collapsed_header, collapsed_header_orig))
    expanded_header_map = dict(zip(expanded_header, expanded_header_orig))
    collapsed_header_json = json.dumps(collapsed_header_map)
    expanded_header_json = json.dumps(expanded_header_map)


    # Filter dataframe as certain columns needs to be removed
    collapsed_result = peptide_dictionary['collapsed']
    expanded_result = peptide_dictionary['expanded']


    # col_to_remove = ['dataset_id']
    collapsed_result = collapsed_result.drop('dataset_id', axis=1)
    expanded_result = expanded_result.drop('dataset_id', axis=1)
        
    # Additional columns to remove
    if qlevel == 'transcript' :
        del collapsed_header_map['Transcript ENST ID']
        collapsed_header_json = json.dumps(collapsed_header_map)
        collapsed_result = collapsed_result.drop('transcript_enst_id', axis=1)

        del expanded_header_map['Transcript ENST IDs']
        expanded_header_json = json.dumps(expanded_header_map)
        expanded_result = expanded_result.drop('transcript_enst_ids', axis=1)


    result = {
        # Need df in list format so that the result page can render them into a table
        'collapsed': collapsed_result.values.tolist(),
        'expanded': expanded_result.values.tolist(),
        # It will use json form of the df when actually downloading tables.
        # Seems simpler and faster(?) to use json to rebuild the dataframe than turning list into dataframe again.
        'collapsed_json': collapsed_result.to_json(orient='columns'),
        'expanded_json': expanded_result.to_json(orient='columns'),
        # Header information will be used when downloading the table.
        'collapsed_header_map': collapsed_header_json,
        'expanded_header_map': expanded_header_json
    }

    return result



def main():
    ex = PepXArgumentParser()
    # ex.parser.add_argument(
    #         '-f',
    #         '--input-file',
    #         type=str,
    #         nargs='?',
    #         action='store',
    #         help='Space separated peptide sequences.')
    args = ex.parser.parse_args()

    ex.check_required_arguments(args)

    # Retrieve Values
    json_filename = args.json_filename
    sequences = []
    qlevel = ''
    datasource = ''
    dataset_id = ''
    list_datasets = args.list_datasets
    pg_summary = args.pg_summary
    output_format = args.output_format[0]
    output_file = args.output_file + '.' + output_format if hasattr(args, 'output_file') else None 
    # Split/Aggregate flags
    split_flag = args.split_parameters_flag
    aggregate_flag = args.aggregate_parameters_flag
    if json_filename:
        with open(json_filename, 'r') as f:
            database = json.load(f)['database']
        ns = argparse.Namespace(db_path=database)
        database = ex.get_database(ns)
    elif aggregate_flag:
        pass
    else:
        database = ex.get_database(args)

    # Before processing any data, confirm if user wants to
    # display any datasets
    if list_datasets:
        qlevel = args.quant_level[0]
        datasource = args.datasource[0]
        ex.get_available_datasets(datasource, qlevel)
        sys.exit(0)

    if aggregate_flag: 
        # Aggregate option values
        job_desc_file = args.job_desc_file
        aggregate_input_dir = args.aggregate_input_dir
        aggregate_result_dir = args.aggregate_result_dir
        # aggregate_output_format = args.aggregate_output_format

        aggregate_result_file(
            job_desc_file, 
            aggregate_input_dir, 
            aggregate_result_dir
            )
        
        sys.exit(0)

    if json_filename:
        with open(json_filename, 'r') as f:
            content = json.load(f)
        sequences = content['input_sequence_text'].split()
        qlevel = content['qlevel'][0]
        dataset_id = content['dataset_id'][0]
        try:
            datasource = content['datasource'][0]
        except:
            pass
    else:
        sequences = ex.get_sequences(args)
        qlevel = args.quant_level[0]
        dataset_id = args.dataset_id[0]
        try:
            datasource = args.datasource[0]
        except:
            pass

    # print(sequences)
    # print(qlevel)
    # print(datasource)
    # # print(database)
    # print(dataset_id)
    # # print(list_datasets)
    # # print(pg_summary)
    # print(args)
    # print(output_file)
    # print(type(output_file))
    # print(split_flag)
    # # exit()

    if split_flag :
        # Split option values
        split_parameters_dir = args.split_parameters_dir
        split_inputs_dir = args.split_inputs_dir
        assume_valid_flag = args.assume_valid_flag
    
        split_parameters_file(
            json_filename, 
            split_parameters_dir, 
            split_inputs_dir,
            executable_file="run_pepx.py", 
            assume_valid=assume_valid_flag
            )
        
        sys.exit(0)

    
    # Retrieving dataset through class function as we
    # don't ask users for dataset name directly.
    # dataset = ex.get_dataset(datasource, qlevel, dataset_id)

    result = process(
        sequences=sequences, 
        qlevel=qlevel,
        # datasource=datasource,
        dataset_id=dataset_id,
        database=database,
        unmapped_peptides='')
    
    if output_format == 'json':
        result = format_to_json_result(result)


    display_or_export_result(output_format=output_format,
                             output_file=output_file,
                             pg_summary=pg_summary,
                             result=result)


if __name__ == "__main__":
    main()