Using Athena Views As A Source In Glue

An error occurred while calling o73.getCatalogSource. No classification or connection in mydatabase.v_my_view

Drivers

IAM

{
"Sid": "VisualEditor1",
"Effect": "Allow",
"Action": [
"logs:CreateLogStream",
"logs:AssociateKmsKey",
"athena:*",
"logs:CreateLogGroup",
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:athena:*:youraccount:workgroup/*",
"arn:aws:athena:*:youracccont:datacatalog/*",
"arn:aws:logs:*:*:/aws-glue/*"
]
}

Create Glue ETL Job

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.dynamicframe import DynamicFrame

args = getResolvedOptions(sys.argv, ["JOB_NAME"])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args["JOB_NAME"], args)

athena_view_dataframe = (
glueContext.read.format("jdbc")
.option("driver", "com.simba.athena.jdbc.Driver")
.option("AwsCredentialsProviderClass","com.simba.athena.amazonaws.auth.InstanceProfileCredentialsProvider")
.option("url", "jdbc:awsathena://athena.eu-west-1.amazonaws.com:443")
.option("dbtable", "AwsDataCatalog.yourathenadatabase.yourathenaview")
.option("S3OutputLocation","s3://yours3bucket/temp")
.load()
)

athena_view_dataframe.printSchema()
.option("driver", "com.simba.athena.jdbc.Driver")
.option("AwsCredentialsProviderClass","com.simba.athena.amazonaws.auth.InstanceProfileCredentialsProvider")
.option("url", "jdbc:awsathena://athena.eu-west-1.amazonaws.com:443")
.option("dbtable", "AwsDataCatalog.yourathenadatabase.yourathenaview")
SELECT * FROM "AwsDataCatalog"."vehicles"."v_electric_cars";
.option("dbtable", "AwsDataCatalog.vehicles.v_electric_cars")
--extra-jars s3://yours3bucket/jdbc-drivers/AthenaJDBC42_2.0.27.1000.jar
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.dynamicframe import DynamicFrame

args = getResolvedOptions(sys.argv, ["JOB_NAME"])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args["JOB_NAME"], args)

athena_view_dataframe = (
glueContext.read.format("jdbc")
.option("driver", "com.simba.athena.jdbc.Driver")
.option("AwsCredentialsProviderClass","com.simba.athena.amazonaws.auth.InstanceProfileCredentialsProvider")
.option("url", "jdbc:awsathena://athena.eu-west-1.amazonaws.com:443")
.option("dbtable", "AwsDataCatalog.vehicles.v_electric_cars")
.option("S3OutputLocation","s3://yours3bucket/temp")
.load()
)

athena_view_dataframe.printSchema()

athena_view_datasource = DynamicFrame.fromDF(athena_view_dataframe, glueContext, "athena_view_source")

pq_output = glueContext.write_dynamic_frame.from_options(
frame=athena_view_datasource,
connection_type="s3",
format="glueparquet",
connection_options={
"path": "s3://yourotherS3Bucket/",
"partitionKeys": [],
},
format_options={"compression": "snappy"},
transformation_ctx="ParquetConversion",
)

job.commit()

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store