Glue jdbc job查不到表
# 在Job内创建临时表解决
from awsglue.context import GlueContext from pyspark.context import SparkContext sc = SparkContext() glueContext = GlueContext(sc) spark = glueContext.spark_session spark.read.format("jdbc") \ .option("url", "jdbc:redshift://redshift-cluster-1.###.us-east-1.redshift.amazonaws.com:5439/test") \ .option("driver", "com.amazon.redshift.jdbc.Driver") \ .option("dbtable", "s3_history_data") \ .option("user", "awsuser") \ .option("password", "") \ .load() \ .createOrReplaceTempView("test_public_s3_history_data") df = spark.sql(""" select * from test_public_s3_history_data; """) df.show()
直接下发sql
import sys import zipfile from awsglue.context import GlueContext from awsglue.job import Job from awsglue.utils import getResolvedOptions from pyspark.context import SparkContext args = getResolvedOptions(sys.argv, ['JOB_NAME']) sc = SparkContext() glueContext = GlueContext(sc) spark = glueContext.spark_session job = Job(glueContext) job.init(args['JOB_NAME'], args) zip_ref = zipfile.ZipFile('./psycopg2.zip', 'r') zip_ref.extractall('/tmp/packages') zip_ref.close() sys.path.insert(0, '/tmp/packages') import psycopg2 print("************py2,psycopg2,glue1,spark2.4********************") conn = psycopg2.connect( database="test", user="awsuser", password=" ", host="redshift-cluster.cgkrvmxgnzr9.us-west-2.redshift.amazonaws.com", port="5439" ) conn.autocommit = True cursor = conn.cursor() cursor.execute(""" DROP TABLE IF EXISTS baby_history_data; """) cursor.execute(""" CREATE TABLE "public"."baby_history_data" ( "user_id" int8, "birthday" int8, "gender" int8, "cat_id" int8, "buy_mount" int8, "day" int8 ); """) cursor.execute(""" INSERT INTO "public".baby_history_data SELECT s3_baby_data.user_id AS user_id, s3_baby_data.birthday AS birthday, s3_baby_data.gender AS gender, s3_history_data.cat_id AS cat_id, s3_history_data.buy_mount AS buy_mount FROM s3_baby_data JOIN s3_history_data ON s3_baby_data.user_id = s3_history_data.user_id; """) # conn.commit() cursor.close() conn.close() print("********************************") job.commit()
copy tablex from 's3:/bucket/xx.txt' credntials 'aws_iam_role=arn:aws:iam::/asdsad/xxrole' delimiter '\t' timeformat 'auto' region 'us-east-2';