阿里DataWorks注册UDTF函数
1.背景
最近有个需求需要解析mongodb里面的json数据,采用的开发平台是dataworks,原始json内容如下:
{
"id": 0,// 方案ID
"premiseDetails": [
{
"premiseId": 0,// 楼盘ID
"price": 0, // 价格
"pointDetails": [
{
"code": "",// 点位编码
"network": 0, // 联网状态:
"unitId": 0, // 单元ID
"unitState": true, //单元状态,梯内没有该字段
"success": false, // 是否选中
"time": "", // 变更时间
"info": "" // 踢点原因
}
]
}
]
}
2.实施
(1)数据同步到mc,我用一个id和premise_details来接收mongodb的数据。

@Resolve("string->string,string,string,string,string,string,string,string")
public class get_mongodb_json_udtf extends UDTF {
@Override
public void process(Object[] objects) throws UDFException {
String input = (String) objects[0];
input = input.replaceAll("=", "\":\"")
.replaceAll("Document", "")
.replaceAll("\\{\\{", "{\"")
.replaceAll("\\}\\}", "\"}")
.replaceAll("]\"},", "]}#")
.replaceAll("\\},\\{", "}#{")
.replaceAll("\\},", "}@")
.replaceAll(",", "\",\"")
.replaceAll("#", ",")
.replaceAll("@", ",")
.replaceAll("pointDetails\":\"", "pointDetails\":")
.replaceAll("}]\"}]", "}]}]")
.replaceAll(" ", "")
;
JsonParser parser = new JsonParser();
// 解析JSON数组字符串
JsonArray jsonArray = parser.parse(input).getAsJsonArray();
if(jsonArray!=null) {
// 遍历JsonArray
for (JsonElement element : jsonArray) {
JsonObject obj = element.getAsJsonObject();
String premiseId = obj.get("premiseId").getAsString();
String price = "";
if (obj.has("price") && !obj.get("price").isJsonNull()) {
price = obj.get("price").getAsString();
}
JsonArray pointDetails = obj.get("pointDetails").getAsJsonArray();
for (JsonElement point : pointDetails) {
JsonObject pointObj = point.getAsJsonObject();
String pointNum = "";
String unitId = "";
String network = "";
String success = "";
String time = "";
String info = "";
if (pointObj.has("code") && !pointObj.get("code").isJsonNull()) {
pointNum = pointObj.get("code").getAsString();
}
if (pointObj.has("unitId") && !pointObj.get("unitId").isJsonNull()) {
unitId = pointObj.get("unitId").getAsString();
}
if (pointObj.has("network") && !pointObj.get("network").isJsonNull()) {
network = pointObj.get("network").getAsString();
}
if (pointObj.has("success") && !pointObj.get("success").isJsonNull()) {
success = pointObj.get("success").getAsString();
}
if (pointObj.has("time") && !pointObj.get("time").isJsonNull()) {
time = pointObj.get("time").getAsString();
}
if (pointObj.has("info") && !pointObj.get("info").isJsonNull()) {
info = pointObj.get("info").getAsString();
}
forward(premiseId, price, pointNum, unitId,
network, success, time, info);
}
}
}
}
}


更多技术知识关注公众号《码农独白》


浙公网安备 33010602011771号