import os
import sys
import json
import pandas as pd


def find_root_dir(start_dir=None, anchor_files=None):
    """Find the app root directory by looking for known anchor files."""
    if start_dir is None:
        start_dir = os.getcwd()  # Default to the current working directory
    
    if anchor_files is None:
        anchor_files = ['LICENSE']
    
    # Normalize to absolute path
    current_dir = os.path.abspath(start_dir)
    
    # Traverse up the directory tree until an anchor file is found or root is reached
    while current_dir != os.path.dirname(current_dir):
        # Check for anchor files in the current directory
        for anchor in anchor_files:
            if os.path.isfile(os.path.join(current_dir, anchor)) or os.path.isdir(os.path.join(current_dir, anchor)):
                return current_dir  # Return the directory where the anchor file is found

        # Check for anchor files in child directories
        for root, dirs, files in os.walk(current_dir):
            for anchor in anchor_files:
                if anchor in dirs or anchor in files:
                    return root  # Return the child directory where the anchor file is found

        current_dir = os.path.dirname(current_dir)  # Move up one level
    
    # If no anchor file is found, return None or raise an error
    return None


# Add pepx-database-interface to the path
APP_ROOT_DIR = find_root_dir(anchor_files=['license-LJI.txt'])
PEPX_DB_INTF_PATH = f'{APP_ROOT_DIR}/lib/pepx-database-interface'
sys.path += [PEPX_DB_INTF_PATH]
from pepx_database_interface.database_functions import Database

pd.set_option('expand_frame_repr', False)
pd.set_option('display.max_columns', 999)

pepx_pwd = os.getenv('PEPX_DB_PWD')
pepx_user = os.getenv('PEPX_DB_USER')
database = Database(password=pepx_pwd, user=pepx_user)

# data_sources = database.get_data_sources()
# print(data_sources)
# data_sources = database.get_data_sources('gene')
# print(data_sources)
# data_sources = database.get_data_sources('transcript')
# print(data_sources)

# datasets = database.get_datasets()
# print(datasets)
# datasets = database.get_datasets('Abelin', None)
# print(datasets)
# datasets = database.get_datasets(None, 'transcript')
# print(datasets)

datasets = database.get_datasets('HPA', 'gene')
print("------")
datasets = json.loads(datasets)
# print(datasets)
for dataset in datasets:
    print(dataset['title'])
    print(dataset)