# -*- coding:utf-8 -*-
import sys
import pandas as pd
from sqlalchemy import create_engine
import MySQLdb
import json
reload(sys)
sys.setdefaultencoding('utf-8')
def find(database, table_name):
# 创建连接
conn = MySQLdb.Connect(host='127.0.0.100'
, user='dataro'
, passwd='hadoop@123'
, db='ws_rc'
, port=3306
, charset='utf8'
)
# 创建查询入口
cur = conn.cursor(MySQLdb.cursors.DictCursor)
# 创建查询
sql = "select * from flows where LENGTH(remarks) > 2 ;"
# 执行sql
cur.execute(sql)
# 获取数据
processResult = cur.fetchall()
# 关闭 cur
cur.close()
# 关闭 conn
conn.close()
# 定义字典,获取要解析的内容
field_dict = {
"id": []
, "apply_id": []
, "current_step": []
, "current_status": []
, "operator_uid": []
, "accepted_uid": []
, "accepted_role": []
, "user": []
, "remark": []
, "remarks": []
, "time": []
, "note": []
, "refuse_type": []
, "created_at": []
, "updated_at": []
}
# 循环解析 提取字段内容
for i in processResult:
rmk = i["remarks"]
dict_rmk = json.loads(rmk, strict=False)
for k in range(len(dict_rmk)):
field_dict["id"].append(i["id"])
field_dict["remarks"].append(i["remarks"])
field_dict["current_step"].append(i["current_step"])
field_dict["current_status"].append(i["current_status"])
field_dict["operator_uid"].append(i["operator_uid"])
field_dict["accepted_uid"].append(i["accepted_uid"])
field_dict["accepted_role"].append(i["accepted_role"])
field_dict["note"].append(i["note"])
field_dict["refuse_type"].append(i["refuse_type"])
field_dict["created_at"].append(i["created_at"])
field_dict["updated_at"].append(i["updated_at"])
field_dict["apply_id"].append(i["apply_id"])
field_dict["user"].append(dict_rmk[k]["user"])
field_dict["remark"].append(dict_rmk[k]["remark"])
field_dict["time"].append(dict_rmk[k]["time"])
dt = pd.DataFrame(data=field_dict)
return dt
def to_mysql(dataframe):
connect = create_engine('mysql+mysqldb://root:hadoop@123@127.0.0.100:3306/data_outer?charset=utf8')
pd.io.sql.to_sql(dataframe
, "sync_ied_ws_rc_flows_remark"
, con=connect
, schema="data_outer"
, if_exists="append"
)
def delete_data():
db = MySQLdb.connect(host=127.0.0.100'
, user='root'
, passwd='ws@2017'
, db='x_engine_data_outer'
, port=3306
, charset='utf8')
cursor = db.cursor()
cursor.execute("delete from data_outer.sync_ied_ws_rc_flows_remark;")
cursor.close()
db.commit()
db.close()
if __name__ == '__main__':
print "start"
delete_data()
dataframe = find("ws_rc", "flows")
to_mysql(dataframe)
print "解析完成"
print "finish!!!"