Glue jdbc job查不到表

# 在Job内创建临时表解决
from awsglue.context import GlueContext
from pyspark.context import SparkContext

sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session

spark.read.format("jdbc") \
    .option("url", "jdbc:redshift://redshift-cluster-1.###.us-east-1.redshift.amazonaws.com:5439/test") \
    .option("driver", "com.amazon.redshift.jdbc.Driver") \
    .option("dbtable", "s3_history_data") \
    .option("user", "awsuser") \
    .option("password", "") \
    .load() \
    .createOrReplaceTempView("test_public_s3_history_data")

df = spark.sql("""
select * from test_public_s3_history_data;
""")
df.show()

直接下发sql

import sys
import zipfile

from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext

args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)

zip_ref = zipfile.ZipFile('./psycopg2.zip', 'r')
zip_ref.extractall('/tmp/packages')
zip_ref.close()
sys.path.insert(0, '/tmp/packages')

import psycopg2

print("************py2,psycopg2,glue1,spark2.4********************")

conn = psycopg2.connect(
    database="test", user="awsuser", password=" ",
    host="redshift-cluster.cgkrvmxgnzr9.us-west-2.redshift.amazonaws.com", port="5439"
)
conn.autocommit = True
cursor = conn.cursor()
cursor.execute("""
DROP TABLE
IF
    EXISTS  baby_history_data;
""")
cursor.execute("""
CREATE TABLE "public"."baby_history_data" (
    "user_id" int8,
    "birthday" int8,
    "gender" int8,
    "cat_id" int8,
    "buy_mount" int8,
"day" int8 
);
""")

cursor.execute("""
INSERT INTO "public".baby_history_data   SELECT
    s3_baby_data.user_id AS user_id,
    s3_baby_data.birthday AS birthday,
    s3_baby_data.gender AS gender,
    s3_history_data.cat_id AS cat_id,
    s3_history_data.buy_mount AS buy_mount 
FROM
    s3_baby_data
    JOIN s3_history_data ON s3_baby_data.user_id = s3_history_data.user_id;
""")
# conn.commit()
cursor.close()
conn.close()

print("********************************")
job.commit()

copy tablex from 's3:/bucket/xx.txt' 
credntials 'aws_iam_role=arn:aws:iam::/asdsad/xxrole'
delimiter '\t' timeformat 'auto' region 'us-east-2';

posted @ 2020-10-16 13:05 慕沁阅读(131) 评论(0) 收藏举报

刷新页面返回顶部

慕沁

Glue jdbc job查不到表

公告