爬取豆瓣电影,保存到json文件中
import urllib.request
url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start=0&limit=20'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62',
}
request = urllib.request.Request(url=url,headers=headers)
res = urllib.request.urlopen(request)
content = res.read().decode('utf-8')
file = open('movie.json','w',encoding='utf-8')
file.write(content)
获取豆瓣电影1-10页
import urllib.request
import urllib.parse
def getMovieTest(page):
data = {
'start':(page - 1) * 20,
'limit':20
}
reqData =urllib.parse.urlencode(data)
url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&' + reqData
print(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62',
}
request = urllib.request.Request(url=url, headers=headers)
res = urllib.request.urlopen(request)
content = res.read().decode('utf-8')
file = open('moviessqw' + str(page) + '.json', 'w', encoding='utf-8')
file.write(content)
file.close()
for i in range(1,11):
getMovieTest(i)
浙公网安备 33010602011771号