Glue jdbc job查不到表

# 在Job内创建临时表解决
from
awsglue.context import GlueContext from pyspark.context import SparkContext sc = SparkContext() glueContext = GlueContext(sc) spark = glueContext.spark_session spark.read.format("jdbc") \ .option("url", "jdbc:redshift://redshift-cluster-1.###.us-east-1.redshift.amazonaws.com:5439/test") \ .option("driver", "com.amazon.redshift.jdbc.Driver") \ .option("dbtable", "s3_history_data") \ .option("user", "awsuser") \ .option("password", "") \ .load() \ .createOrReplaceTempView("test_public_s3_history_data") df = spark.sql(""" select * from test_public_s3_history_data; """) df.show()

 

 

直接下发sql

import sys
import zipfile

from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext

args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)

zip_ref = zipfile.ZipFile('./psycopg2.zip', 'r')
zip_ref.extractall('/tmp/packages')
zip_ref.close()
sys.path.insert(0, '/tmp/packages')

import psycopg2

print("************py2,psycopg2,glue1,spark2.4********************")

conn = psycopg2.connect(
    database="test", user="awsuser", password=" ",
    host="redshift-cluster.cgkrvmxgnzr9.us-west-2.redshift.amazonaws.com", port="5439"
)
conn.autocommit = True
cursor = conn.cursor()
cursor.execute("""
DROP TABLE
IF
    EXISTS  baby_history_data;
""")
cursor.execute("""
CREATE TABLE "public"."baby_history_data" (
    "user_id" int8,
    "birthday" int8,
    "gender" int8,
    "cat_id" int8,
    "buy_mount" int8,
"day" int8 
);
""")

cursor.execute("""
INSERT INTO "public".baby_history_data   SELECT
    s3_baby_data.user_id AS user_id,
    s3_baby_data.birthday AS birthday,
    s3_baby_data.gender AS gender,
    s3_history_data.cat_id AS cat_id,
    s3_history_data.buy_mount AS buy_mount 
FROM
    s3_baby_data
    JOIN s3_history_data ON s3_baby_data.user_id = s3_history_data.user_id;
""")
# conn.commit()
cursor.close()
conn.close()

print("********************************")
job.commit()

 

copy tablex from 's3:/bucket/xx.txt' 
credntials 'aws_iam_role=arn:aws:iam::/asdsad/xxrole'
delimiter '\t' timeformat 'auto' region 'us-east-2';

 

posted @ 2020-10-16 13:05  慕沁  阅读(117)  评论(0)    收藏  举报