爬取代理IP
目的
- 有时候需要使用代理IP,可用自己写一个爬取免费的代理IP,可用在需要的时候直接(import)导入即可
代码
import requests
from lxml import etree
ips = list()
"""国内免费代理"""
def h_proxies():
url = 'https://www.kuaidaili.com/free/inha/1/'
response = requests.get(url=url).content.decode()
html = etree.HTML(response)
proxy_info = html.xpath('//div/table/tbody/tr')
for info in proxy_info:
ip = info.xpath('./td[@data-title="IP"]/text()')
port = info.xpath('./td[@data-title="PORT"]/text()')
proxy_ip = ip[0] + ':' + port[0]
ips.append(proxy_ip)
return ips
"""国外免费代理"""
def f_proxies():
url = 'https://ip.jiangxianli.com/?page=2'
response = requests.get(url=url).content.decode()
html = etree.HTML(response)
proxy_info = html.xpath('//div/table/tbody/tr')
for info in proxy_info:
ip = info.xpath('./td[1]/text()')
port = info.xpath('./td[2]/text()')
proxy_ip = ip[0] + ':' + port[0]
ips.append(proxy_ip)
return ips
"""快代理"""
def k_proxies():
url = 'https://www.kuaidaili.com/free/'
response = requests.get(url=url).content.decode()
html = etree.HTML(response)
proxy_info = html.xpath('//div/table/tbody/tr')
for info in proxy_info:
ip = info.xpath('./td[1]/text()')
port = info.xpath('./td[2]/text()')
proxy_ip = ip[0] + ':' + port[0]
ips.append(proxy_ip)
return ips