import json
import random
import subprocess
import sys
import os

def evaluate_model(endpoint_name):
    # Clear pip cache to ensure clean installation
    subprocess.check_call(["rm", "-Rf", os.path.expanduser("~/.cache/pip/*")])

    # Install required packages
    subprocess.check_call([sys.executable, "-m", "pip", "install", "fmeval", "--upgrade-strategy", "only-if-needed", "--force-reinstall"])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "jsonlines"])

    # Import necessary libraries
    import fmeval
    import jsonlines
    import sagemaker
    import boto3

    # Set up AWS environment
    os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
    runtime = boto3.client("sagemaker-runtime")
    sagemaker_session = sagemaker.Session()

    # Set up SageMaker predictor for the specified endpoint
    predictor = sagemaker.predictor.Predictor(
        endpoint_name=endpoint_name,
        serializer=sagemaker.serializers.JSONSerializer(),
        deserializer=sagemaker.deserializers.JSONDeserializer()
    )

    # Function to test the endpoint with a sample prompt
    def test_endpoint(predictor):
        prompt = "Tell me about Amazon SageMaker"
        payload = {
            "inputs": prompt,
            "parameters": {
                "do_sample": True,
                "top_p": 0.9,
                "temperature": 0.8,
                "max_new_tokens": 100
            },
        }
        response = predictor.predict(payload)
        print(f'Query successful. \n\nExample: Prompt: {prompt} ... Model response: {response["generated_text"]}')
        output_format = '[0].generated_text'
        return output_format

    output_format = test_endpoint(predictor)

    # Test endpoint and convert the payload to JSON
    prompt = "Tell me about Amazon SageMaker."
    payload = {
        "inputs": prompt,
        "parameters": {
            "do_sample": True,
            "top_p": 0.9,
            "temperature": 0.8,
            "max_new_tokens": 100,
        },
    }
    content_type = "application/json"
    
    try:
        # Try to invoke the existing endpoint
        print(f"Utilizing invoke_endpoint API call for existing endpoint: {endpoint_name}")
        response = runtime.invoke_endpoint(EndpointName=endpoint_name, Body=json.dumps(payload), ContentType=content_type)
        result = json.loads(response['Body'].read().decode())
        print(result['generated_text'])
    
    except NameError:
        # If the above fails, use the predictor created earlier
        endpoint_name = predictor.endpoint_name
        response = predictor.predict(payload)
        print(response["generated_text"])

    # Create an evaluation dataset in JSONL format with capital cities and their regions
    capitals = [
        ("Aurillac", "Cantal"),
        ("Bamiyan", "Bamiyan Province"),
        ("Sokhumi", "Abkhazia"),
        ("Bukavu", "South Kivu"),
        ("Senftenberg", "Oberspreewald-Lausitz"),
        ("Legazpi City", "Albay"),
        ("Sukhum", "Abkhazia"),
        ("Paris", "France"),
        ("Berlin", "Germany"),
        ("Tokyo", "Japan"),
        ("Moscow", "Russia"),
        ("Madrid", "Spain"),
        ("Rome", "Italy"),
        ("Beijing", "China"),
        ("London", "United Kingdom"),
    ]

    # Function to generate a single entry for the dataset
    def generate_entry():
        city, region = random.choice(capitals)
        if random.random() < 0.2:
            alternatives = [f"{region} Province", f"{region} province", region]
            answers = f"{region}<OR>" + "<OR>".join(random.sample(alternatives, k=random.randint(1, len(alternatives))))
        else:
            answers = region
        return {
            "answers": answers,
            "knowledge_category": "Capitals",
            "question": f"{city} is the capital of"
        }

    # Generate the dataset
    num_entries = 15
    dataset = [generate_entry() for _ in range(num_entries)]
    input_file = "capitals_dataset.jsonl"
    with open(input_file, "w") as f:
        for entry in dataset:
            f.write(json.dumps(entry) + "\n")

    # Function to create payload for model inference
    def create_payload(prompt, parameters={"do_sample": True, "top_p": 0.9, "temperature": 0.8, "max_new_tokens": 32}):
        if len(prompt) == 0:
            raise ValueError("Please provide a non-empty prompt.")
        return {"inputs": prompt, "parameters": parameters}

    # Create we a new dataset file that contains the same columns as the original dataset, along with a new column for model output
    output_file = "capitals_dataset_with_model_outputs.jsonl"

    with jsonlines.open(input_file) as input_fh, jsonlines.open(output_file, "w") as output_fh:
        for line in input_fh:
            if "question" in line:
                question = line["question"]
                payload = create_payload(question)
                response = runtime.invoke_endpoint(EndpointName=endpoint_name, Body=json.dumps(payload), ContentType=content_type)
                result = json.loads(response['Body'].read().decode())
                model_output = result['generated_text']
                line["model_output"] = model_output
                output_fh.write(line)

    # Set up and run evaluation using fmeval Factual Knowledge evaluation
    content_template = '{"inputs": $prompt, "parameters": {"do_sample": true, "top_p": 0.9, "temperature": 0.8, "max_new_tokens": 32}}'

    from fmeval.data_loaders.data_config import DataConfig
    from fmeval.constants import MIME_TYPE_JSONLINES
    from fmeval.eval_algorithms.factual_knowledge import FactualKnowledge, FactualKnowledgeConfig
    from fmeval.model_runners.sm_model_runner import SageMakerModelRunner

    # Set up SageMaker model runner
    model_runner = SageMakerModelRunner(
        endpoint_name=endpoint_name,
        content_template=content_template,
        output="generated_text"
    )

    # Configure the dataset for evaluation
    config = DataConfig(
        dataset_name="capitals_dataset_with_model_outputs",
        dataset_uri=output_file,
        dataset_mime_type=MIME_TYPE_JSONLINES,
        model_input_location="question",
        target_output_location="answers",
        model_output_location="model_output"
    )

    # Set up and run the factual knowledge evaluation
    eval_algo = FactualKnowledge(FactualKnowledgeConfig(target_output_delimiter="<OR>"))
    eval_output = eval_algo.evaluate(model=model_runner, dataset_config=config, prompt_template="$model_input", save=True)

    # Print the evaluation results
    print(json.dumps(eval_output, default=vars, indent=4))