# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import xlrd,xlwt
urls = [
"https://maoyan.com/board/4?offset={}".format(i)
for i in range(0,100,10)
]
header = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ("
"KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
#电影 主演 时间 国家 评分
def FilmInformation(url):
content = []
r = requests.get(url,headers = header)
respond = r.text
soup = BeautifulSoup(respond,"html.parser")
films = soup.select(".board-item-main")
for film in films:
name = (film.select("[title]")[0].text)
# name = (film.select(".name a")[0].text) 也可以 为什么是0呢,因为返回的是一个列表
staring = (film.select(".star")[0].text).strip().split(":")[1] #中文的冒号
releasetime = (film.select(".releasetime")[0].text).split(":")[1].split("(")[0]
country = (film.select(".releasetime")[0].text).split(":")[1][10:]
if country:
country = country.replace("(","").replace(")","") #lstrip() rstip()等也可以实现
else:
country = "(暂无)"
integer = (film.select(".integer")[0].text)
fraction = (film.select(".fraction")[0].text)
score = integer + fraction
content.append([name,staring,releasetime,country,score])
return content
def WriteExcel(data):
title = ["电影","主演","时间","国家","评分"]
workbook = xlwt.Workbook(encoding = "utf-8")
sheet = workbook.add_sheet("猫眼前100")
row = 1
for i in range(len(title)):
sheet.write(0,i,title[i])
for num in data:
for num_num in num:
#for num_num_num in num_num:
for col in range(len(num_num)):
sheet.write(row,col,num_num[col])
row +=1
workbook.save("maoyan.xls")
def main():
all = []
for url in urls:
result = FilmInformation(url)
all.append(result)
WriteExcel(all)
if __name__ =="__main__":
main()