(一) sqoop从mysql导入到hive

【sqoop官方文档】 http://sqoop.apache.org/docs/1.4.6/SqoopUserGuide.html#_incremental_imports

--1、 mysql建表
drop table cust_info;
create table cust_info(
`id` bigint comment '编号',
`telephone` VARCHAR(20) comment '电话',
`tname` VARCHAR(50) comment '姓名',
`create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间',
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '更新时间'
)comment '人员信息' ;

alter table cust_info change tname tname varchar(255) character set utf8;

--备注:mysql 中文字符 设置为 utf8
--INSERT INTO cust_info VALUES (1,'13511001','张三',current_timestamp(),current_timestamp());
--INSERT INTO cust_info VALUES (2,'13611001','李四',current_timestamp(),current_timestamp());

INSERT INTO cust_info VALUES (1,'13511001','张三',STR_TO_DATE('2020-08-07 16:01:45', '%Y-%m-%d %H:%i:%s'),STR_TO_DATE('2020-08-07 16:01:45', '%Y-%m-%d %H:%i:%s'));
INSERT INTO cust_info VALUES (2,'13611001','李四',STR_TO_DATE('2020-08-08 16:01:45', '%Y-%m-%d %H:%i:%s'),STR_TO_DATE('2020-08-08 16:01:45', '%Y-%m-%d %H:%i:%s'));

 

-- hive导出表

drop table hive_cust_info;
create table hive_cust_info(
`id` bigint comment '编号', 
`telephone` VARCHAR(20) comment '电话', 
`tname` VARCHAR(50) comment '姓名', 
`create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间', 
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '更新时间'
)comment '人员信息' ;

alter table hive_cust_info change tname tname varchar(255) character set utf8;

 

SELECT * FROM cust_info ;

--2、rsc 层建表语句
drop table rsc.cust_info;
create table rsc.cust_info(
`id` bigint comment '编号',
`telephone` string comment '电话',
`tname` string comment '姓名',
`create_time` timestamp comment '创建时间',
`update_time` timestamp comment '更新时间'
)comment '人员信息'
PARTITIONED BY(etl_dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u0001'
STORED AS TEXTFILE;

 --备注 rsc 一般为分表以拉链表方式保留历史

--rsc建表格式为TEXTFILE、字段分隔符和mysql端保持一致为【\u0001】

--rdw层建表

create table rdw.t01_cust(
`id` bigint comment '编号',
`telephone` string comment '电话',
`tname` string comment '姓名',
`create_time` timestamp comment '创建时间',
`update_time` timestamp comment '更新时间'
)comment '人员信息'
PARTITIONED BY(etl_dt string)
STORED AS ORC;

 

-- sqoop 任务1 (mysql导出到hive)

sqoop import  --connect jdbc:mysql://192.168.91.112:3306/bgdmysqldb  --username root  --password '2019_Mysql'  --table cust_info --columns "id,telephone,tname,create_time,update_time" --where " 1=1 and  (create_time>=concat('2020-08-07',' 00:00:00') and create_time<=concat('2020-08-07',' 23:59:59')) OR (update_time>=concat('2020-08-07',' 00:00:00') and update_time<=concat('2020-08-07',' 23:59:59')) "  --fields-terminated-by '\001'  --num-mappers 1  --hive-import  --hive-database rsc  --hive-table cust_info    --delete-target-dir  --hive-partition-key etl_dt  --hive-partition-value 2020-08-04 --hive-drop-import-delims  --null-string 'N'  --null-non-string '0'

 

--以下是换行版本(功能和以上相同,“--” 之间用 空格分开)

sqoop import \
--connect jdbc:mysql://192.168.91.112:3306/bgdmysqldb\
--username root\
--password '2019_Mysql'\
--table cust_info\
--columns "id,telephone,tname,create_time,update_time"\
--where " 1=1 and (create_time>=concat('2020-08-07',' 00:00:00') and create_time<=concat('2020-08-07',' 23:59:59')) OR (update_time>=concat('2020-08-07',' 00:00:00') and update_time<=concat('2020-08-07',' 23:59:59')) "\
--fields-terminated-by '\001'\
--num-mappers 1\
--hive-import\
--hive-database rsc\
--hive-table cust_info\
--delete-target-dir\
--hive-partition-key etl_dt\
--hive-partition-value 2020-08-04\
--hive-drop-import-delims\
--null-string 'N'\
--null-non-string '0'

----------------------------------------------------------------------------------------

 

 

 

-- 备注:sqoop 导入后字段分隔符--fields-terminated-by '\001'  与hive的默认字段分隔符 '\001' 保持一致。

 

--sqoop任务2(从hive导出到mysql)

sqoop export --connect 'jdbc:mysql://192.168.91.112:3306/bgdmysqldb?useUnicode=true&characterEncoding=utf-8'  --username root  --password '2019_Mysql' --table hive_cust_info --export-dir /user/hive/warehouse/rsc.db/cust_info/etl_dt=2020-08-04 --input-fields-terminated-by '\001'

-- 备注1 “?useUnicode=true&characterEncoding=utf-8” 防止导出乱码

--备注2 input-fields-terminated-by '\001' 使用正确分隔符。

-- 参考:

 

sqoop import --connect jdbc:mysql://10.10.10.10:7185/jssclub?tinyInt1isBit=false
--username BI
--password xxxxxxxxx
--table video
--columns "id,title,video_img,video_link,lecturer_name,avatar,lecturer_introduce,description,video_type,sort,video_status,create_time,update_time,one_type,video_duration,video_img_clear,entry_video_type,train_type"
--where "1=1" -m 1
--class-name rsc_jssclub_video
--delete-target-dir
--target-dir /tmp/rsc/jssclub_video_2020-08-04
--hive-import
--hive-drop-import-delims
--hive-database rsc
--hive-table jssclub_video
--hive-partition-key etl_dt
--hive-partition-value 2020-08-04
--fields-terminated-by '\001'
--null-string '\\N'
--null-non-string '0'
--input-null-string \\N
--input-null-non-string "\\N"

 

参考(二)hive中的分隔符:

https://blog.csdn.net/qq_26442553/article/details/80297028?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-3.channel_param&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-3.channel_param

 

posted @ 2020-08-07 16:36  bjxdd  阅读(227)  评论(0)    收藏  举报