#! /usr/bin/python

def mutgen_validate(inputs):
    errors = []
    params = {}

    annotate = inputs.get("annotate", False)

    # Validate input_vcf_text (should be a non-empty string)
    if not isinstance(inputs.get("input_vcf"), str) or not inputs["input_vcf"]:
        errors.append("input_vcf parameter must be provided for path of a valid VCF file.")
        return inputs, errors
    import vcfpy
    try:
        reader = vcfpy.Reader.from_path(inputs.get("input_vcf"))
    except Exception as e:
        errors.append(f"VCF file error: {e}.")
        return inputs, errors
    if not reader.header.has_header_line('INFO','ANN') and not annotate:
        errors.append("This VCF does not appear to be annotated with SnpEff. Please add 'annotate': true to the input parameters. See https://nextgen-tools.iedb.org/docs/tools/mpg/index.html#vcf-input-files for more information.")
        return inputs, errors

    # Validate peptide_length (should be an integer in the range 9-40)
    peptide_length = inputs.get("peptide_length", 21)
    if not isinstance(peptide_length, int) or not (9 <= peptide_length <= 40):
        errors.append("peptide_length must be an integer in the range 9-40.")
        return inputs, errors

    # Validate peptide_mutation_position first (separated by comma and each of them should be an integer in the range 1-peptide_length)
    peptide_mutation_position = inputs.get("peptide_mutation_position", None)
    if peptide_mutation_position:
        # Check if every part is a valid number
        for position in peptide_mutation_position.split(","):
            if not position.isdigit() or not (1 <= int(position) <= peptide_length):
                errors.append(f"peptide_mutation_position must be numbers separated by comma and each of them should be in the range 1-{peptide_length}.")
                break
    # only run additional validation if peptide_mutation_position is not given
    else:
        # Validate peptide_mutation_position1 (should be an integer in the range 1-peptide_length)
        peptide_mutation_position1 = inputs.get("peptide_mutation_position1", peptide_length // 2)
        if not isinstance(peptide_mutation_position1, int) or not (1 <= peptide_mutation_position1 <= peptide_length):
            errors.append(f"peptide_mutation_position1 must be an integer in the range 1-{peptide_length}.")

        # Validate peptide_mutation_position2 (if not None, should be an integer in the range 1-peptide_length)
        peptide_mutation_position2 = inputs.get("peptide_mutation_position2", None)
        if peptide_mutation_position2 is not None:
            if not isinstance(peptide_mutation_position2, int) or not (1 <= peptide_mutation_position2 <= peptide_length):
                errors.append(f"peptide_mutation_position2 must be an integer in the range 1-{peptide_length} or None.")

    # Validate frameshift_overlap (should be an integer in the range 1-20)
    frameshift_overlap = inputs.get("frameshift_overlap", 9)
    if not isinstance(frameshift_overlap, int) or not (1 <= frameshift_overlap <= 20):
        errors.append("frameshift_overlap must be an integer in the range 1-20.")


    # Validate maximum_peptide_length (should be an integer in the range peptide_length+1 to peptide_length+10)
    maximum_peptide_length = inputs.get("maximum_peptide_length", peptide_length + 3)
    if not isinstance(maximum_peptide_length, int) or not (peptide_length + 1 <= maximum_peptide_length <= peptide_length + 10):
        errors.append(f"maximum_peptide_length must be an integer in the range {peptide_length + 1}-{peptide_length + 10}.")

    # Validate reference_genome (should be a non-empty string)
    if not isinstance(inputs.get("reference_genome"), str) or not inputs["reference_genome"]:
        errors.append("reference_genome must be a non-empty string.")

    if not errors:
        inputs.update(dict(
            peptide_length=peptide_length, 
            peptide_mutation_position1=peptide_mutation_position1, 
            peptide_mutation_position2=peptide_mutation_position2, 
            frameshift_overlap=frameshift_overlap, 
            maximum_peptide_length=maximum_peptide_length
        ))

    return inputs, errors

# Example usage:
inputs = {
  "input_vcf": "examples/syntheticX.speedseq.ann.vcf",
  "peptide_length": 120,
  "peptide_mutation_position1": 16,
  "peptide_mutation_position2": 115,
  "frameshift_overlap": 9,
  "min_length_near_start_stop": 14,
  "maximum_peptide_length": 22,
  "reference_genome": "GRCh38"
}
inputs = {
  "input_vcf": "examples/syntheticX.speedseq.ann.vcf",
  "peptide_length": 120,
  "peptide_mutation_position": "16,115",
  "frameshift_overlap": 9,
  "min_length_near_start_stop": 14,
  "maximum_peptide_length": 22,
  "reference_genome": "GRCh38"
}
if __name__ == "__main__":
    validation_errors = mutgen_validate(inputs)
    if validation_errors:
        print("Validation errors:")
        for error in validation_errors:
            print(f"- {error}")
    else:
        print("All inputs are valid.")

