# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

#!/usr/bin/python

import boto3
import json
import sys
import textwrap

#Information about your Amazon Kendra index, region and search query
indexid = "REPLACE-WITH-YOUR-AMAZON-KENDRA-INDEX-ID"
region = "REPLACE-WITH-YOUR-REGION"

#The underlying data in this case is related to AWS Storage, Databases and Compute
#technologies which have data encryption as a common topic. Hence the query 
#"How to encrypt data?" is relevant. When working with your data, choose a query
#based on your data.
kquery = "How to encrypt data?"

#The MaxResults parameter is used to display only top MaxResults facets. In the case
#of our example, there are only three facets of Technology and Document_Type hence
#this parameter is not particularly useful however when the number of facets is high,
#it makes sense to use this parameter.
max_results = 5

#Initialize Kendra client to be used for API calls
kclient = boto3.client('kendra', region_name=region)

#A routine to print the hierarchical facets to help users guide their next search
def print_facet_results(fr, indent=1):
    for item in fr:
        print((' ' * 2) * indent + item["DocumentAttributeKey"])
        for pair in item["DocumentAttributeValueCountPairs"]:
            print((' ' * 2) * (indent + 1) + pair["DocumentAttributeValue"]["StringValue"] + ':' + str(pair["Count"]))
            if "FacetResults" in pair:
                print_facet_results(pair["FacetResults"], indent + 2)

#A routine to print the first search result from the response
def print_first_result(resp):
    r = resp["ResultItems"][0]
    print("Document Title: ", r["DocumentTitle"]["Text"])
    print("Document Attributes:")
    for attr in r["DocumentAttributes"]:
        if (attr["Key"] in ["Document_Type", "Technology"]):
            print("  %s: %s" % (attr["Key"], attr["Value"]["StringValue"]))
    print("Document Excerpt:")
    print(textwrap.fill(r["DocumentExcerpt"]["Text"], width=70,initial_indent='  ',subsequent_indent='  '))

#A routine to pretty print query response
def print_response(response, filter_str=''):
    print("Query: ", kquery)
    if (filter_str != ''):
        print("Query Filter: %s" % filter_str)
    print("Number of results:", response["TotalNumberOfResults"])
    print_first_result(response)
    print("-" * 70)
    print("Facets:")
    print_facet_results(response["FacetResults"])
    print("=" * 70)

#Facets without a hierarchy
fac0 = [
    { "DocumentAttributeKey":"Technology" },
    { "DocumentAttributeKey":"Document_Type" }
]

#Make a query using facets without a hierarchy
print_response(
    kclient.query(IndexId=indexid, QueryText=kquery, Facets=fac0))

#Ensure that the string for the filter and the filter are the same
#The string is defined for convenient printing
str_filter0 = "Technology: Storage AND Document_Type: User_Guides"
att_filter0 = {
    "AndAllFilters": [
        {
            "EqualsTo":{
                "Key": "Technology",
                "Value": {
                    "StringValue": "Storage"
                }
            }
        },
        {
            "EqualsTo":{
                "Key": "Document_Type",
                "Value": {
                    "StringValue": "User_Guides"
                }
            }
        }
    ]
}

#Based on the facets, make a query to select Technology:Storage and Document_Type:User_Guides
print_response(
    kclient.query(IndexId=indexid, QueryText=kquery, Facets=fac0, AttributeFilter=att_filter0), 
    filter_str = str_filter0) 

#Hierarchical facets where the documents are first classified with respect to
#the technology they belong to and then classified with respect to
#the document type
fac1 = [{
    "DocumentAttributeKey":"Technology", 
    "Facets":[{
        "DocumentAttributeKey":"Document_Type",
        "MaxResults": max_results
    }],
}]

#Make a query using hierarchy with Technology as the parent facet and Document_Type as a child facet
print_response(
    kclient.query(IndexId=indexid, QueryText=kquery, Facets=fac1))

#Ensure that the string for the filter and the filter are the same
#The string is defined for convenient printing
str_filter1 = "Technology: Databases AND Document_Type: Reference_Guides"
att_filter1 = {
    "AndAllFilters": [
        {
            "EqualsTo":{
                "Key": "Technology",
                "Value": {
                    "StringValue": "Databases"
                }
            }
        },
        {
            "EqualsTo":{
                "Key": "Document_Type",
                "Value": {
                    "StringValue": "Reference_Guides"
                }
            }
        }
    ]
}

#Based on the facets, make a query to select Technology:Databases and Document_Type:Reference_Guides
print_response(
    kclient.query(IndexId=indexid, QueryText=kquery, Facets=fac1, AttributeFilter=att_filter1), 
    filter_str = str_filter1)

#Hierarchical facets where the documents are first classified with respect to
#the document type and then classified with respect to the
#technology they belong to
fac2 = [{
    "DocumentAttributeKey":"Document_Type", 
    "Facets":[{
        "DocumentAttributeKey":"Technology",
        "MaxResults": max_results
    }]
}]

#Make a query using hierarchy with Document_Type as the parent facet and Technology as a child facet
print_response(
    kclient.query(IndexId=indexid, QueryText=kquery, Facets=fac2))

#Ensure that the string for the filter and the filter are the same
#The string is defined for convenient printing
str_filter2 = "Document_Type: Reference_Guides AND Technology:Compute"
att_filter2 = {
    "AndAllFilters": [
        {
            "EqualsTo":{
                "Key": "Document_Type",
                "Value": {
                    "StringValue": "Reference_Guides"
                }
            }
        },
        {
            "EqualsTo":{
                "Key": "Technology",
                "Value": {
                    "StringValue": "Compute"
                }
            }
        }
    ]
}
#Based on the facets, make a query to select Document_Type: Reference_Guides and Technology:Compute
print_response(
    kclient.query(IndexId=indexid, QueryText=kquery, Facets=fac2, AttributeFilter=att_filter2), 
    filter_str = str_filter2)
