import requests
from multiprocessing import Pool
import re
from requests.exceptions import RequestException
import json
def get_one_page(url):
try:
res = requests.get(url)
if res.status_code == 200:
return res.text
return None
except RequestException:
return None
def parse_one_page(html):
pat = re.compile('<dd>.*?board-index.*?>(\d+)</i>.*?data-src="(.*?)".*?name"><a'
+'.*?>(.*?)</a>.*?star">(.*?)</p>.*?releasetime">(.*?)</p>'
+'.*?integer">(.*?)</i>.*?fraction">(.*?)</i>.*?(/dd)', re.S)
inem = re.findall(pat, html)
for item in inem:
yield {
'名次':item[0],
'图片':item[1],
'名称':item[2],
'主演':item[3].strip()[3:],#strip() --- 去空格 [3:] --切片
'上映时间': item[4].strip()[5:],
'评分': item[5]+ item[6].strip()
}
def wrire_to(content):
with open('dianying.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(content, ensure_ascii=False)+ '\n')
f.close()
def main(offset):
url = "https://maoyan.com/board/4?offset=" + str(offset)
html = get_one_page(url)
for item in parse_one_page(html):
print(item)
wrire_to(item)
if __name__ == '__main__':
# for i in range(10):
# main(i*10)
pool = Pool()
pool.map(main, [i*10 for i in range(10)])