批量采集培训机构数据

最近做一个培训机构学校查询网,发现有个这方面的数据,所以使用php写了这个接口进行查询。
在php环境新建个peixun.php文件,
代码如下:

<?php
// 接口地址
$url = 'https://xwpx.eduyun.cn/tolSpInfo/getSpInfoList';
$page = $_GET['page'] ?? 1; // 提供默认页码,避免未传参数错误
 
 
// 定义省级地区编码数组
$numbers = [
    110000, 120000, 130000, 140000, 150000, 
    210000, 220000, 230000, 310000, 320000, 
    330000, 340000, 350000, 360000, 370000, 
    410000, 420000, 430000, 440000, 450000, 
    460000, 500000, 510000, 520000, 530000, 
    540000, 610000, 620000, 630000, 640000, 
    650000, 660000
];
 
// 获取并验证地区参数
$area = $_GET['area'] ?? '';
$areaid = isset($numbers[$area]) ? $numbers[$area] : '';
if (empty($areaid)) {
    echo json_encode(['error' => '无效的地区参数'], JSON_UNESCAPED_UNICODE);
    exit();
}
 
/**
 * 获取子地区编码列表
 * [url=home.php?mod=space&uid=952169]@Param[/url] string $areaid 父地区编码
 * [url=home.php?mod=space&uid=155549]@Return[/url] array 子地区编码数组
 */
function getareaid($areaid) {
    // 接口URL
    $url = 'https://xwpx.eduyun.cn/xspxRegister/getChildArea';
 
    // 请求参数
    $data = [
        'PAGE_SERIAL_VERSION_UID' => '',
        'areaCode' => $areaid
    ];
 
    // 构建POST数据
    $postData = http_build_query($data);
 
    // 创建cURL资源
    $ch = curl_init();
 
    // 设置URL和相应的选项
    curl_setopt_array($ch, [
        CURLOPT_URL => $url,
        CURLOPT_POST => true,
        CURLOPT_POSTFIELDS => $postData,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_HTTPHEADER => [
            'Referer: https://xwpx.eduyun.cn/tol/toHomePageParentServices',
            'User-Agent: mozilla/5.0 (macintosh; intel mac os x 10_15_1) applewebkit/537.36 (khtml, like gecko) brave chrome/78.0.3904.70 safari/537.36 Edg/139.0.0.0',
            'Content-Type: application/x-www-form-urlencoded'
        ],
        CURLOPT_COOKIE => 'HWWAFSESID=d82bf97755bdb320d6; HWWAFSESTIME=1755569214521; SESSION=552af9b3-ac7a-4bba-b35a-27978f142f24; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzAxNDY3MTE5YWItMDkxNmMxNmMxNmMxNmMtNGM2NTdiNTgtMjA3MzYwMC0xOThjMDE0NjcxMjExM2QifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%7D; Hm_lvt_c3f009f814f701e8fad8a17f9682ec79=1755596687; HMACCOUNT=497681945067787A; Hm_lpvt_c3f009f814f701e8fad8a17f9682ec79=1755596697',
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_SSL_VERIFYHOST => false
    ]);
 
    // 执行cURL请求并获取响应
    $response = curl_exec($ch);
 
    // 检查是否有错误发生
    if (curl_errno($ch)) {
        error_log('cURL错误: ' . curl_error($ch));
        curl_close($ch);
        return [];
    }
 
    // 关闭cURL资源
    curl_close($ch);
 
    // 解析JSON响应
    $result = json_decode($response, true);
 
    // 检查解析是否成功
    if (json_last_error() !== JSON_ERROR_NONE) {
        error_log('JSON解析错误: ' . json_last_error_msg());
        return [];
    }
 
    // 提取并返回子地区编码
    $areaCodes = [];
    if ($result['retCode'] === '000000' && !empty($result['data'])) {
        foreach ($result['data'] as $item) {
            if (!empty($item['areaCode'])) {
                $areaCodes[] = $item['areaCode'];
            }
        }
    }
     
    return $areaCodes;
}
 
 
// 获取子地区编码列表
$childAreaCodes = getareaid($areaid);
 
// 如果没有子地区,直接退出
if (empty($childAreaCodes)) {
    echo json_encode(['error' => '未获取到子地区编码'], JSON_UNESCAPED_UNICODE);
    exit();
}
 
// 处理其他请求参数
$object = $_GET['object'] ?? '';
if ($object == 5) {
    $object = "0,1,2,3,4";
}
 
$profitType = $_GET['type'] ?? '';
 
// 获取当前循环索引,默认为0
$currentIndex = isset($_GET['index']) ? intval($_GET['index']) : 0;
 
// 确保索引在有效范围内
if ($currentIndex < 0 || $currentIndex >= count($childAreaCodes)) {
    $currentIndex = 0; // 超出范围则重置为0
}
 
// 获取当前要使用的城市编码
$currentCityCode = $childAreaCodes[$currentIndex];
 
 
// 计算下一个索引(循环)
$nextIndex = ($currentIndex + 1) % count($childAreaCodes);
 
// 构建最终请求参数,只传入当前城市编码
$postData = [
    'PAGE_SERIAL_VERSION_UID' => '',
    'province' => $areaid,
    'city' => $currentCityCode, // 传入当前城市编码
    'area' => '',
    'object' => $object,
    'profitType' => '1',
    'businessType' => '1',
    'pageNo' => $page,
    'pageSize' => 10
];
 
// 转换为URL编码的字符串
$postDataString = http_build_query($postData);
 
// 创建cURL资源
$ch = curl_init();
 
// 设置URL和相应的选项
curl_setopt_array($ch, [
    CURLOPT_URL => $url,
    CURLOPT_POST => true,
    CURLOPT_POSTFIELDS => $postDataString,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_HTTPHEADER => [
        'Content-Type: application/x-www-form-urlencoded',
        'Referer: https://xwpx.eduyun.cn/tolSpInfo/index',
        'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
    ],
    CURLOPT_COOKIE => 'HWWAFSESID=d82bf97755bdb320d6; HWWAFSESTIME=1755569214521; SESSION=552af9b3-ac7a-4bba-b35a-27978f142f24; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fcn.bing.com%2F%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzAxNDY3MTE5YWItMDkxNmMxNmMxNmMxNmMtNGM2NTdiNTgtMjA3MzYwMC0xOThjMDE0NjcxMjExM2QifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%7D',
    // 忽略SSL证书验证(生产环境建议开启验证)
    CURLOPT_SSL_VERIFYPEER => false,
    CURLOPT_SSL_VERIFYHOST => false
]);
 
// 执行请求并获取响应
$response = curl_exec($ch);
 
// 检查是否有错误发生
if (curl_errno($ch)) {
    $result = ['error' => '请求错误: ' . curl_error($ch)];
} else {
    // 解析响应
    $responseData = json_decode($response, true);
    $result = $responseData ?: ['error' => '无法解析响应数据'];
     
    // 添加当前索引和下一个索引信息,方便前端循环调用
    $result['current_index'] = $currentIndex;
    $result['next_index'] = $nextIndex;
    $result['current_city_code'] = $currentCityCode;
    $result['total_cities'] = count($childAreaCodes);
}
 
// 关闭cURL资源
curl_close($ch);
 
// 格式化输出JSON
echo json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
?>

  这样就可以,执行的参数为:peixun.php?page=[地址参数]&area=[地址参数1]&object=[地址参数2]&type=[地址参数3]&index=[地址参数4]
其中:
地址参数:1-49
地址参数1:1-31
地址参数2:1-5
地址参数3: 1-5
地址参数4:0-30

效果:http://www.6983.net/company/5/

posted @ 2025-08-30 12:18  圆柱模板  阅读(22)  评论(0)    收藏  举报