""""
title : 爬虫
dateTime : 2020-6-30
"""
# from urllib import request
# from urllib import request
# import urlopen,Request from urllib.request
from urllib.request import urlopen, Request
# from lxml import etree
from lxml import etree
import re
# import lxml
# weburl = "https://bz.zzzmh.cn/#people"
weburl = "https://movie.douban.com/top250"
def crow(i):
global weburl
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'}
ret = Request(weburl, headers=headers)
res = urlopen(ret)
html = res.read().decode('utf-8')
# with open("mm.txt", "w", encoding="utf-8") as fp:
# fp.write(html)
# etree = html.etree
html = etree.HTML(html)
# datas = html.xpath("") # class="view-body"
# pattern01 = r'<a href="(.*?)" title=".*?" class="tit" target="_blank">.*?</a> <span><u><a href="http://www.27270.com/ent/meinvtupian/" title="美女图片">美女图片</a>'
datas = html.xpath('//ol[@class="grid_view"]/li') # <ol class="grid_view">
for data in datas:
data_title = data.xpath('div/div[2]/div[@class="hd"]/a/span[1]/text()')
data_star = data.xpath('div/div[2]/div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')
data_quote = data.xpath('div/div[2]/div[@class="bd"]/p[@class="quote"]/span[@class="inq"]/text()')
print(data_title, end=" ,")
print(data_star, end=" ,简介:")
print(data_quote)
crow(1)