【大数据基本】四、sqoop
CREATE TABLE ods_stat_hospital
(
id bigint,
hospital_code string comment '医院编码',
primary_id string comment 'his的主键id',
dept_name string,
stat_date string comment '统计日期',
channel_type string comment '渠道类型',
action_type string comment '统计类型',
amount string comment '总数',
update_time string comment '更新时间',
dept_code string,
sync_time string
)
COMMENT '医院信息'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001';
要么指定分片
sqoop import \
--query "select * from $i where \$CONDITIONS" \
--connect "jdbc:mysql://192.168.29.42:3306/dr_alpha?characterEncoding=utf8&useSSL=false" \
--username "zhyx" \
--password "zhyx" \
--delete-target-dir \
--target-dir "/user/zhyx" \
--num-mappers 1 \
--fields-terminated-by '\001' \
--null-string '\\N' \
--null-non-string '\\N' \
--mapreduce-job-name "job $i" \
--hive-database "dr_alpha" \
--hive-table "ods_$i" \
--hive-drop-import-delims \
--hive-import \
--hive-overwrite \
要么按id分片,id分片默认分四片,小文件过多
#! /bin/bash
for i in stat_doctor stat_hospital push_log log_gateway his_register_yygh
do
sqoop import \
-Dorg.apache.sqoop.splitter.allow_text_splitter=true \
--query "select * from $i where \$CONDITIONS" \
--connect "jdbc:mysql://192.168.29.42:3306/dr_alpha?characterEncoding=utf8&useSSL=false" \
--username "zhyx" \
--password "zhyx" \
--delete-target-dir \
--target-dir "/user/zhyx" \
--split-by "id" \
--fields-terminated-by '\001' \
--null-string '\\N' \
--null-non-string '\\N' \
--mapreduce-job-name "job $i" \
--hive-database "dr_alpha" \
--hive-table "ods_$i" \
--hive-drop-import-delims \
--hive-import \
--hive-overwrite \
done
一、命令参数
同步数据的方式
第一种直接同步表,但是字段会打乱
$sqoop import \
--connect jdbc:mysql://192.168..42:3306/hospital_data_center \
--username zhyx \
--password zhyx \
--table v_user_info \
--target-dir /origin_data/hospital_data_center/db/v_user_info/2022-01-08 \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by '\t' \
--null-string '\\N' \
--null-non-string '\\N'
第一种查下字段排列
sqoop import \
--connect jdbc:mysql://192.168..42:3306/hospital_data_center \
--username zhyx \
--password zhyx \
--target-dir /origin_data/hospital_data_center/db/v_user_info/2022-01-08 \
--delete-target-dir \
--query "select id,hospital_code,primary_id,patient_id,admission_number,card_no,out_patient_no,patient_name,patient_sex,birthday,id_card_no,phone,age,height,weight,patient_type,in_state_code,in_state,create_time,update_time from v_user_info where \$CONDITIONS" \
--num-mappers 1 \
--fields-terminated-by '\t' \
--null-string '\\N' \
--null-non-string '\\N'
第三种方式更推荐,不用再额外建表导数据
sqoop import \
--connect jdbc:mysql://192.168.29.42:3306/hospital_data_center \
--username zhyx \
--password zhyx \
--query "select id,hospital_code,primary_id,patient_id,admission_number,card_no,out_patient_no,patient_name,patient_sex,birthday,id_card_no,phone,age,height,weight,patient_type,in_state_code,in_state,create_time,update_time from v_user_info where \$CONDITIONS" \
--target-dir /origin_data/hospital_data_center/db/v_user_info/2022-01-10 \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by '\t' \
--hive-import \
--hive-database hospital_data_center \
--hive-table V_USER_INFO \
--null-string '\\N' \
--null-non-string '\\N'