urllib发送请求
import urllib.request
url = "http://www.baidu.com"
response = urllib.request.urlopen(url)
content = response.read().decode('utf-8')
print(content)
如果不加decode解码会是字节数据
code = response.getcode() print(code)
headers = response.getheaders() print(headers)
下载:
import urllib.request url = 'http://www.baidu.com' urllib.request.urlretrieve(url,'baidu1.html')
下载图片:
import urllib.request url = 'https://img1.baidu.com/it/u=1095880180,3931424613&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=889' urllib.request.urlretrieve(url,'daimei.jpg')
百度url加了https后会有反爬,返回数据就没了
url = 'https://www.baidu.com'
res = urllib.request.urlopen(url)
content = res.read().decode('utf-8')
print(content)
百度搜索关键字需要转成ASCII才行
import urllib.request
import urllib.parse
#url = 'https://www.baidu.com/s?wd=%E5%91%A8%E6%9D%B0%E4%BC%A6'
key = urllib.parse.quote('周杰伦')
url = 'https://www.baidu.com/s?wd=' + key
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'
}
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)
百度搜索多个关键字使用urlencode:
import urllib.request
import urllib.parse
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'
}
data = {
'wd':'周杰伦',
'sex':'男',
'location':'台湾'
}
reqData = urllib.parse.urlencode(data)
url = 'https://www.baidu.com/s?' + reqData
print(reqData)
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)
浙公网安备 33010602011771号