import requests
from bs4 import BeautifulSoup
import time
import re
import json
import csv
urls=[]
tc=['名字','评分','导演','演员','时长']
with open('C:\\Users\\lenovo\\Desktop\\go1.csv', 'a+', newline='', encoding='utf-8')as f:
writers = csv.writer(f)
writers.writerow(tc)
header={'Host':'movie.douban.com',
'Referer':'https://movie.douban.com/explore',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
for i in range(25):
url='https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start={}'.format(i*20)#由于翻页网址不变,则从开发者工具查找
response=requests.get(url,headers=header)
ds=json.loads(response.text)#response都是JSON格式的loads函数将字典转为字符串
for j in range(20):
d=ds['subjects'][j]['url']#json格式,找出网址
urls.append(d)
listw=[]
response = requests.get(d, headers=header)
soup=BeautifulSoup(response.text,'html.parser')
time.sleep(10)
name=soup.find('span',{'property':'v:itemreviewed'})
score=soup.find('strong',{'property':'v:average'})
daoyan=soup.find('span',{'class':'attrs'})
star=soup.find_all('span',{'class':'attrs'})
for i in star:
c=i.find_all('a',{'rel':'v:starring'})
for i in c:
listw.append(i.text)
shijian=soup.find('span',{'property':'v:runtime'})
with open('C:\\Users\\lenovo\\Desktop\\go1.csv', 'a+', newline='', encoding='utf-8')as f:
writers=csv.writer(f)
h=[name.text,score.text,daoyan.text,listw,shijian.text]
writers.writerow(h)