手机号段抓取脚本

手机号段抓取脚本

#! -*- coding:utf-8 -*-
import requests
from multiprocessing.pool import ThreadPool
from lxml import etree

# 取消验证警告
from requests.packages.urllib3.exceptions import InsecureRequestWarning 
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)


headers_str = """Host: www.sdfl.net
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
Accept-Language: zh-TW,zh;q=0.8,en-US;q=0.5,en;q=0.3
Accept-Encoding: gzip, deflate, br
Referer: https://cn.bing.com/
DNT: 1
Connection: keep-alive
Upgrade-Insecure-Requests: 1
Sec-Fetch-Dest: document
Sec-Fetch-Mode: navigate
Sec-Fetch-Site: cross-site
Sec-Fetch-User: ?1
Pragma: no-cache
Cache-Control: no-cache"""


headers = dict([line.split(": ",1) for line in headers_str.split("\n")])

proxies = {
        "http": "socks5://127.0.0.1:8080",
        }

url = "https://www.sdfl.net/s/hubei/wuhan/"
ret = requests.get(url=url, headers=headers, verify=False)  #proxies=proxies


# print(ret.text)
print("[*] 获取html成功")

html_data = etree.HTML(ret.text)

seach_list = html_data.xpath(r'//div[@class="wrap h_list"]/dl[@class="list"]')

print("[*] 获取手机号段数量:", len(seach_list))

phone_list = []

for x in seach_list:                    # 获取搜素结果
    ret = x.xpath(r'./dd/a/text()')
    for id in ret:
        # print(id)
        for y in range(0, 10000):
            # print(y)
            phone_list.append("{}{:0>4d}\n".format(id, y))

print("[*] 获取手机号总数量:", len(phone_list))
with open(r'phone.txt', "a") as f:
    for phone in phone_list:
        f.writelines(phone)
posted @ 2022-11-20 14:36  是谁走漏了消息  阅读(122)  评论(0)    收藏  举报