python 根据公司名称 查询公司地址

需求:

根据公司名称 查询公司地址

有3000个公司名称的xlsx 

计划方案:

方案1:用高德地图的API 获取,个人开发着免费5000条,成本为0

方案2:用企查查 这类API,官网报价0.15元 /次 查3000条 成本:500元

预期收益:

朋友介绍的需求:

88元红包

 

方案选择:

综上,选择方案1

 

实现:

 

高德地图API:https://lbs.amap.com/api/webservice/guide/api-advanced/search

高德地图API key的获得:https://lbs.amap.com/api/webservice/create-project-and-key

 

页面可以测试:

image

 

query_address.py

import requests


def get_location(keywords: str):
    """
    使用 高德地图 API 根据地址查询位置信息。
    """
    url = "https://restapi.amap.com/v3/place/text?parameters"

    params = {
        "key": "填入你的key",   #####
        "keywords": keywords,
        "types": "公司企业",
        "children": 1,
        "offset": 10,
        "extensions":"base"
    }


    res_data = requests.get(url, params=params, timeout=10)
    # 转换为<class 'dict'>
    res_dict = res_data.json()
    # print(res_dict)
    base_data = res_dict["pois"][0]
    # 拿到基础数据
    pname = base_data.get("pname")
    cityname = base_data["cityname"]
    address = base_data["address"]


    return pname,cityname,address


# 示例使用
if __name__ == "__main__":
    keywords = "安徽安科"  # 替换成你想查询的名称
    pname,cityname, address = get_location(keywords)
    print(pname,cityname,address)

image

main.py 

import pandas as pd
from query_address import get_location
import time


def read_xlsx():
    df = pd.read_excel('company.xlsx')
    return df


def main():
    df = read_xlsx()
    first_column = df.iloc[:, 0]

    # 确保有足够的列并设置列名
    if df.shape[1] < 4:
        # 添加缺失的列
        for col_idx in range(df.shape[1], 4):
            if col_idx == 1:
                df['pname'] = None
            elif col_idx == 2:
                df['cityname'] = None
            elif col_idx == 3:
                df['address'] = None
            else:
                df[f'col_{col_idx}'] = None

    # 统计信息
    valid_rows = first_column.notna().sum()
    total_rows = len(first_column)
    print(f"总行数: {total_rows}, 需要处理: {valid_rows} 行")

    processed = 0
    success = 0
    failed = 0

    for i, value in enumerate(first_column):
        if pd.notna(value):
            processed += 1
            print(f"\n[{processed}/{valid_rows}] 第{i + 1}行: {value}")

            try:
                pname, cityname, address = get_location(value)
                print(f"✓ 成功: {pname} | {cityname} | {address}")

                # 存储结果
                df.iloc[i, 1] = pname
                df.iloc[i, 2] = cityname
                df.iloc[i, 3] = address

                success += 1

                # 定期保存进度
                if processed % 10 == 0:  # 每10条保存一次
                    backup_file = f'test_progress_{processed}.xlsx'
                    df.to_excel(backup_file, index=False)
                    print(f"🔄 进度已保存到 {backup_file}")

                # API调用间隔,避免频率过高
                time.sleep(0.1)  # 根据API限制调整

            except Exception as e:
                print(f"✗ 失败: {e}")
                df.iloc[i, 1] = "查询失败"
                df.iloc[i, 2] = ""
                df.iloc[i, 3] = ""
                failed += 1

    # 最终保存
    final_file = 'test_final_result.xlsx'
    df.to_excel(final_file, index=False)

    print(f"\n=== 处理完成 ===")
    print(f"总计处理: {processed} 行")
    print(f"成功: {success} 行")
    print(f"失败: {failed} 行")
    print(f"结果已保存到: {final_file}")

    return df


if __name__ == '__main__':
    result_df = main()

    # 显示处理结果摘要
    print("\n=== 结果预览 ===")
    print(result_df.head(10))

    # 统计成功率
    success_count = (result_df.iloc[:, 1] != "查询失败").sum()
    total_processed = result_df.iloc[:, 1].notna().sum()
    if total_processed > 0:
        success_rate = (success_count / total_processed) * 100
        print(f"\n成功率: {success_rate:.1f}% ({success_count}/{total_processed})")

 

 

 

运行main:

image

总体目录:

image

 

结果:

image

 

感谢AI:

claude sonnet 4 think 

posted @ 2025-08-13 14:49  77gg  阅读(42)  评论(0)    收藏  举报