网址中文转译
1 import urllib.parse
2 import string
3 new_url = urllib.parse.quote(new_url, safe=string.printable) #将包含汉字的网址转译
利用字典拼接url(字典传参)
1 import urllib.parse
2 import string
3 url = "http://www.baidu.com/s?"
4 dic = {"wd":"百度", “key”:"zhang"}
5 _url = urllib.parse.urlencode(dic)#可以将字典中:转译为=
6 new_url = url + _url
7 end_url = urllib.parse.quote(new_url, safe = "string.printable")#中文转译
动态传入User-agent
1 import urllib.request
2 import random
3
4 def load_baidu():
5
6 url = "http://www.baidu.com"
7 user_agent_list = [
8 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
9 "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
10 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
11 "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50"
12
13 ]
14 #每次请求的浏览器都是不一样的
15 random_user_agent = random.choice(user_agent_list)
16
17 request = urllib.request.Request(url)
18
19 #增加对应的请求头信息(user_agent)
20 request.add_header("User-Agent",random_user_agent)
21
22 #请求数据
23 response = urllib.request.urlopen(request)
24 #请求头的信息
25 print(request.get_header("User-agent"))
26
27 load_baidu()
利用不同ip请求网络数据
1 import urllib.request
2
3 def proxy_user():
4 #创建代理ip列表用于轮流访问
5 proxy_list = [
6 {"https": "106.75.226.36:808"},
7 {"https": "61.135.217.7:80"},
8 {"https": "125.70.13.77:8080"},
9 {"https": "118.190.95.35:9001"}
10 ]
11 for proxy in proxy_list:
12 print(proxy)
13 #利用遍历出来的ip创建处理器
14 proxy_handler = urllib.request.ProxyHandler(proxy)
15 #创建opener
16 opener = urllib.request.build_opener(proxy_handler)
17
18 try:
19 data = opener.open("http://www.baidu.com", timeout=1)
20 #使用创建出来的opener的open函数
21 haha = data.read()
22 print(haha)
23 except Exception as e:
24 print(e)
25
26
27 proxy_user()