3 datax mysql和hive之间相互导入

                                            mysql-->hive
 
 
0 参考文档:
https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md
 
1.配置文件:mysql2hive.json
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "column": ["id","username"],
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://192.168.43.20:3306/test"],
                                "table": ["target_user"]
                            }
                        ],
                        "password": "111111",
                        "username": "root",
                        "where": ""
                    }
                },
                "writer": {
                    "name": "hdfswriter",
                    "parameter": {
                        "column": [
                {"name":"id","type":"string"}
                {"name":"username","type":"string"}
                              ],
                        "compress": "gzip",
                        "defaultFS": "hdfs://192.168.43.20:8020",
                        "fieldDelimiter": ",",
                        "fileName": "target_user",
                        "fileType": "text",
                        "path": "/user/hive/warehouse/mysql2hive",
                        "writeMode": "append"
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}
 
 
 
 
2 创建mysql数据:
    
3 创建对应的hive表
    
create table mysql2hive(
id int,
username string
)row format delimited
fields terminated by ","
lines terminated by "\n";
 
4  执行命令:
     python datax.py mysql2hdfs.json
5 hive查看数据
 
                    hive-->mysql
 
1配置文件 hdfs2mysql.json
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "hdfsreader",
                    "parameter": {
                        "column": [
                {"index":0,"type":"long"}
                {"index":1,"type":"string"}
            ],
                        "defaultFS": "hdfs://192.168.43.20:8020",
                        "encoding": "UTF-8",
                        "fieldDelimiter": ",",
                        "fileType": "text",
                        "path": "/user/hive/warehouse/mysql2hive"
                    }
                },
                "writer": {
                    "name": "mysqlwriter",
                    "parameter": {
                        "column": ["id","username"],
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://192.168.43.20:3306/test",
                                "table": ["target_user"]
                            }
                        ],
                        "password": "111111",
                        "preSql": [],
                        "session": [],
                        "username": "root",
                        "writeMode": "insert"
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}
 
 
    
2 查看hive数据源:
3 准备好mysql 目标表
4 执行命令: python datax.py  hdfs2mysql.json
 
5 查看mysql数据:
    
 
 
注意点: 1,如果mysql字段是数值型,而hive表字段是string,可以导入导入数据到hive表中
                    但是如果hive表的字段是string,导入对应的mysql字段是int型,就会报错。
posted on 2019-07-09 09:51  一只猪儿虫  阅读(9956)  评论(0编辑  收藏  举报