练习1---request+正则(猫眼电影)

`import requests
import re
import io
import sys
from requests.exceptions import RequestException

sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

def get_page_one(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36"}
try:
response = requests.get(url, headers=headers)
return response.text
except RequestException as e:
return e

def parse_one_page(html):
pattern = re.compile('

.?board-index.?>(\d+).?name.?<a.?>(.?).?star">(.?)

', re.S)
items = re.findall(pattern, html)

for item in items:
    yield {
        'index': item[0],
        'title': item[1],
        'actor': item[2].strip()
    }

def main():
url = 'https://maoyan.com/board/4'
html = get_page_one(url)
result = parse_one_page(html)
for item in result:
print(item)
if name == 'main':
main()`

posted @ 2020-11-21 11:06  tingshu  阅读(72)  评论(0编辑  收藏  举报