# -*- coding: utf-8 -*-
import requests
import re
import os
if not os.path.exists('青春无悔'):
os.mkdir('青春无悔')
url = 'https://www.yibige.cc/110006/index.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400'
}
response_1 = requests.get(url=url, headers=headers)
# 自动转码
response_1.encoding = response_1.apparent_encoding
html_data = response_1.text
# 获取小说章节列表
result_list = re.findall('<dd><a href="(.*?)">.*?</a></dd>', html_data, re.S)
# result_list: 获取第一次提取的链接,方便第二次提取
result_list = result_list[:525]
for result_name in result_list[:525]:
# 链接拼接
all_url = 'https://www.yibige.cc/110006/' + result_name
# 发送网络请求
response_2 = requests.get(all_url)
response = requests.get(url=url, headers=headers)
# 自动转码
response_2.encoding = response.apparent_encoding
html_data_2 = response_2.text
# 标题
title = re.findall('<h1>(.*?)</h1>', html_data_2, re.S)[0]
# 文章的内容
result = re.findall('<div id="content" class="contentjs">(.*?)</div>', html_data_2, re.S)
txt = result[0].replace(' ', ' ').replace('</p><p>', '\n').replace('<p>', '').replace("</p><script>site_con_ad('亿笔阁','https://www.yibige.cc');</script>", '')
# 保存
with open('青春无悔\\' + title + '.txt', mode='w', encoding='utf-8') as f:
f.write(txt)
print('下载成功:', title)