1 import urllib.request
2 import urllib.parse
3 import string
4
5
6 def get_params():
7 url = "http://www.baidu.com/s?"
8
9 params = {
10 "wd":"中文",
11 "key":"zhang",
12 "value":"san"
13
14 }
15 str_params = urllib.parse.urlencode(params)
16 print(str_params)
17 final_url = url + str_params
18
19 #将带有中文的url 转译成计算机可以识别的url
20 end_url = urllib.parse.quote(final_url,safe=string.printable)
21
22 response = urllib.request.urlopen(end_url)
23
24 data = response.read().decode("utf-8")
25 print(data)
26
27
28 get_params()
1 import urllib.request
2
3 def load_baidu():
4 url= "http://www.baidu.com"
5 #添加请求头的信息
6
7
8 #创建请求对象
9 request = urllib.request.Request(url)
10 #请求网络数据
11 response = urllib.request.urlopen(request)
12 print(response)
13 data = response.read().decode("utf-8")
14
15 #响应头
16 # print(response.headers)
17 #获取请求头的信息
18 request_headers = request.headers
19 print(request_headers)
20 with open("02header.html","w")as f:
21 f.write(data)
22
23
24
25 load_baidu()
1 import urllib.request
2 import random
3
4 def load_baidu():
5
6 url = "http://www.baidu.com"
7 user_agent_list = [
8 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
9 "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
10 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
11 "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50"
12
13 ]
14 #每次请求的浏览器都是不一样的
15 random_user_agent = random.choice(user_agent_list)
16
17 request = urllib.request.Request(url)
18
19 #增加对应的请求头信息(user_agent)
20 request.add_header("User-Agent",random_user_agent)
21
22 #请求数据
23 response = urllib.request.urlopen(request)
24 #请求头的信息
25 print(request.get_header("User-agent"))
26
27 load_baidu()
1 import urllib.request
2
3 def handler_openner():
4
5 #系统的urlopen并没有添加代理的功能所以需要我们自定义这个功能
6 #安全 套接层 ssl第三方的CA数字证书
7 #http80端口# 和https443
8 #urlopen为什么可以请求数据 handler处理器
9 #自己的oppener请求数据
10
11 # urllib.request.urlopen()
12 url = "https://blog.csdn.net/m0_37499059/article/details/79003731"
13
14 #创建自己的处理器
15 handler = urllib.request.HTTPHandler()
16 #创建自己的oppener
17 opener=urllib.request.build_opener(handler)
18 #用自己创建的opener调用open方法请求数据
19 response = opener.open(url)
20 # data = response.read()
21 data = response.read().decode("utf-8")
22
23
24 with open("02header.html", "w")as f:
25 f.write(data)
26
27 handler_openner()
1 import urllib.request
2
3
4 def create_proxy_handler():
5 url = "https://blog.csdn.net/m0_37499059/article/details/79003731"
6
7 #添加代理
8 proxy = {
9 #免费的写法
10 "http":""
11 # "http":"120.77.249.46:8080"
12 #付费的代理
13 # "http":"xiaoming":123@115.
14
15
16 }
17 #代理处理器
18 proxy_handler = urllib.request.ProxyHandler(proxy)
19
20 #创建自己opener
21 opener = urllib.request.build_opener(proxy_handler)
22 #拿着代理ip去发送请求
23 response = opener.open(url)
24 data = response.read().decode("utf-8")
25
26
27 with open("03header.html", "w")as f:
28 f.write(data)
29
30 create_proxy_handler()
import urllib.request
def proxy_user():
proxy_list = [
{"https":""},
# {"https":"106.75.226.36:808"},
# {"https":"61.135.217.7:80"},
# {"https":"125.70.13.77:8080"},
# {"https":"118.190.95.35:9001"}
]
for proxy in proxy_list:
print(proxy)
#利用遍历出来的ip创建处理器
proxy_handler = urllib.request.ProxyHandler(proxy)
#创建opener
opener = urllib.request.build_opener(proxy_handler)
try:
data = opener.open("http://www.baidu.com",timeout=1)
haha = data.read()
print(haha)
except Exception as e:
print(e)
proxy_user()
1 import urllib.request
2
3 def load_baidu():
4 url= "https://www.baidu.com"
5 header = {
6 #浏览器的版本
7 "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
8
9 }
10
11
12 #创建请求对象
13 request = urllib.request.Request(url)
14 #动态的去添加head的信息
15 request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")
16 #请求网络数据(不在此处增加请求头信息因为此方法系统没有提供参数)
17 response = urllib.request.urlopen(request)
18 print(response)
19 data = response.read().decode("utf-8")
20
21 #获取到完整的url
22 final_url = request.get_full_url()
23 print(final_url)
24
25 #响应头
26 # print(response.headers)
27 #获取请求头的信息(所有的头的信息)
28 # request_headers = request.headers
29 # print(request_headers)
30 #(2)第二种方式打印headers的信息
31 #注意点:首字母需要大写,其他字母都小写
32 request_headers = request.get_header("User-agent")
33 # print(request_headers)
34 with open("02header.html","w")as f:
35 f.write(data)
36
37
38
39 load_baidu()