# Copyright 2022, Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Amazon Software License (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#   http://aws.amazon.com/asl/
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.


# Sample script to illustrate AWS IAM/Amazon S3 permissions for a database and table in Data Catalog. 
# Glue Job to read a table from the Data Catalog and display first 20 rows. 
# Then it counts the number of rows and columns and prints it. 
# Provide the catalog-id where the database and table resides. 
 

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job

args = getResolvedOptions(sys.argv, ['JOB_NAME', 'catalog-id'])

sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
catalogid = args['catalog-id']
dyf = glueContext.create_dynamic_frame.from_catalog(catalog_id = catalogid, database='hybridsalesdb', table_name='hybridsalesproduct')
dyf.printSchema()
df = dyf.toDF()
df.show(20)
total_records = df.count()
print(f"Total records in the table : {total_records}")
total_columns = len(df.columns)
print(f"Total columns in the table : {total_columns}")
job.commit()