python 根据公司名称 查询公司地址
需求:
根据公司名称 查询公司地址
有3000个公司名称的xlsx
计划方案:
方案1:用高德地图的API 获取,个人开发着免费5000条,成本为0
方案2:用企查查 这类API,官网报价0.15元 /次 查3000条 成本:500元
预期收益:
朋友介绍的需求:
88元红包
方案选择:
综上,选择方案1
实现:
高德地图API:https://lbs.amap.com/api/webservice/guide/api-advanced/search
高德地图API key的获得:https://lbs.amap.com/api/webservice/create-project-and-key
页面可以测试:

query_address.py
import requests
def get_location(keywords: str):
"""
使用 高德地图 API 根据地址查询位置信息。
"""
url = "https://restapi.amap.com/v3/place/text?parameters"
params = {
"key": "填入你的key", #####
"keywords": keywords,
"types": "公司企业",
"children": 1,
"offset": 10,
"extensions":"base"
}
res_data = requests.get(url, params=params, timeout=10)
# 转换为<class 'dict'>
res_dict = res_data.json()
# print(res_dict)
base_data = res_dict["pois"][0]
# 拿到基础数据
pname = base_data.get("pname")
cityname = base_data["cityname"]
address = base_data["address"]
return pname,cityname,address
# 示例使用
if __name__ == "__main__":
keywords = "安徽安科" # 替换成你想查询的名称
pname,cityname, address = get_location(keywords)
print(pname,cityname,address)
main.py
import pandas as pd
from query_address import get_location
import time
def read_xlsx():
df = pd.read_excel('company.xlsx')
return df
def main():
df = read_xlsx()
first_column = df.iloc[:, 0]
# 确保有足够的列并设置列名
if df.shape[1] < 4:
# 添加缺失的列
for col_idx in range(df.shape[1], 4):
if col_idx == 1:
df['pname'] = None
elif col_idx == 2:
df['cityname'] = None
elif col_idx == 3:
df['address'] = None
else:
df[f'col_{col_idx}'] = None
# 统计信息
valid_rows = first_column.notna().sum()
total_rows = len(first_column)
print(f"总行数: {total_rows}, 需要处理: {valid_rows} 行")
processed = 0
success = 0
failed = 0
for i, value in enumerate(first_column):
if pd.notna(value):
processed += 1
print(f"\n[{processed}/{valid_rows}] 第{i + 1}行: {value}")
try:
pname, cityname, address = get_location(value)
print(f"✓ 成功: {pname} | {cityname} | {address}")
# 存储结果
df.iloc[i, 1] = pname
df.iloc[i, 2] = cityname
df.iloc[i, 3] = address
success += 1
# 定期保存进度
if processed % 10 == 0: # 每10条保存一次
backup_file = f'test_progress_{processed}.xlsx'
df.to_excel(backup_file, index=False)
print(f"🔄 进度已保存到 {backup_file}")
# API调用间隔,避免频率过高
time.sleep(0.1) # 根据API限制调整
except Exception as e:
print(f"✗ 失败: {e}")
df.iloc[i, 1] = "查询失败"
df.iloc[i, 2] = ""
df.iloc[i, 3] = ""
failed += 1
# 最终保存
final_file = 'test_final_result.xlsx'
df.to_excel(final_file, index=False)
print(f"\n=== 处理完成 ===")
print(f"总计处理: {processed} 行")
print(f"成功: {success} 行")
print(f"失败: {failed} 行")
print(f"结果已保存到: {final_file}")
return df
if __name__ == '__main__':
result_df = main()
# 显示处理结果摘要
print("\n=== 结果预览 ===")
print(result_df.head(10))
# 统计成功率
success_count = (result_df.iloc[:, 1] != "查询失败").sum()
total_processed = result_df.iloc[:, 1].notna().sum()
if total_processed > 0:
success_rate = (success_count / total_processed) * 100
print(f"\n成功率: {success_rate:.1f}% ({success_count}/{total_processed})")
运行main:

总体目录:

结果:

感谢AI:
claude sonnet 4 think


浙公网安备 33010602011771号