数据清洗2
代码如下:
import json
import pymysql
import pandas as pd
import requests
from sqlalchemy import create_engine
# 连接MySQL数据库
conn = pymysql.connect(host='localhost', user='root', password='你的密码', db='你的数据库', charset='utf8')
# 编写SQL查询语句
sql = "SELECT * FROM table3 "
# 使用pandas读取数据
df = pd.read_sql(sql, conn)
new_addresses = []
# 更改循环变量名,避免与 DataFrame 对象名冲突
for i, address in enumerate(df['单位']):
if address:
url = 'https://restapi.amap.com/v3/geocode/geo?address=' + address + '&output=JOSON&key=你的key'
request = requests.get(url).text
try:
result = json.loads(request)
except json.JSONDecodeError as e:
new_address=address
if result['status'] == "1":
adcode = result['geocodes'][0]['adcode']
sql2 = "UPDATE table3 SET 行政区划编码 = '%s' where 单位 = '%s' " % (adcode, address)
# 执行数据库插入
cursor = conn.cursor()
cursor.execute(sql2)
conn.commit()
if result['geocodes'][0]['district']:
new_address = str(result['geocodes'][0]['province']) + str(result['geocodes'][0]['city']) + str(result['geocodes'][0]['district'])
else:
if result['geocodes'][0]['city']:
new_address = result['geocodes'][0]['province'] + result['geocodes'][0]['city']
else:
new_address = result['geocodes'][0]['province']
else:
new_address = address
else:
new_address = ""
address = ""
if new_address != address:
sql1 = "UPDATE table3 SET 单位 = '%s' WHERE 单位 = '%s'" % (new_address, address)
#向table3插入行政区划编码
# 使用cursor执行SQL语句
cursor = conn.cursor()
cursor.execute(sql1)
conn.commit()
cursor.close()
print(address + "---->" + new_address + " --" + str(i))
conn.close()
同理其他表也是这样操作,然后再合并。


浙公网安备 33010602011771号