1 #coding:utf-8
2 import requests,os,re
3 from bs4 import BeautifulSoup
4 from selenium import webdriver
5 from selenium.webdriver.chrome.options import Options
6 from selenium.webdriver.common.keys import Keys
7
8 class downloader():
9
10 def __init__(self):
11 self.urls = [] # 保存章节链接
12 self.name = [] # 保存章节名
13 self.url = 'https://so.biqusoso.com/s.php?ie=utf-8&siteid=biqugex.com&q='
14
15 """输入小说名,搜索"""
16 def Get_url(self):
17 #创建chrome参数对象,设置chrome浏览器无界面模式
18 chrome_options = Options()
19 chrome_options.add_argument('--headless')
20 # 创建chrome无界面对象
21 browser = webdriver.Chrome(options=chrome_options)
22 browser.get(self.url)
23 c = input('请输入小说全名:')
24 browser.find_element_by_xpath('//*[@id="wrapper"]/div[1]/div[2]/form/input[3]').send_keys(c)
25 browser.find_element_by_xpath('//*[@id="wrapper"]/div[1]/div[2]/form/input[4]').click()
26 new_url = browser.current_url
27 # 关闭浏览器
28 browser.close()
29 # 关闭chromedriver进程
30 browser.quit()
31 print("已关闭浏览器")
32 # print(new_url)
33 response = requests.get(new_url)
34 response.encoding = 'utf-8'
35 soup = BeautifulSoup(response.text, 'lxml')
36 # print(soup)
37 name1 = soup.find_all('span', class_='s2')
38 soup = BeautifulSoup(str(name1), 'lxml')
39 new_name = soup.find('a')
40 new_name1 = new_name.string
41 # print(new_name1)
42 self.href = new_name.attrs['href']
43 print(self.href)
44 return self.href
45 def Response(self):
46 response = requests.get(self.href)
47 response.encoding = 'gbk' # 解决乱码
48 self.soup = BeautifulSoup(response.text, 'lxml') # 解析网页
49 div = self.soup.find_all('div', class_='listmain') # 在解析结果中查找class_='listmain'
50 soup1 = BeautifulSoup(str(div), 'lxml') # 删除字符串头和尾的空格
51 h = soup1.find_all('a') # 在class_='listmain下面找到a标签
52 for i in h:
53 self.name.append(i.string) # 将a标签中的非属性字符,即章节名添加到name
54 self.urls.append('https://www.biqugex.com%s' % i.get('href')) # 将a标签中的链接,添加到urls
55
56 def file(self):
57 """查找小说名字,并创建同名文件夹"""
58 div1 = self.soup.select('body > div.book > div.info > h2')
59 a = BeautifulSoup(str(div1), 'lxml')
60 b = a.find('h2')
61 b = b.string
62 c = 'C:\\Users\\Administrator\\Desktop\\%s' % b
63 if not os.path.exists(c):
64 os.mkdir(c)
65
66 # 循环解析urls,得到小说正文
67 i = 0
68 while i < len(self.urls):
69 response1 = requests.get(url=self.urls[i])
70 response1.encoding = 'gbk'
71 soup2 = BeautifulSoup(response1.text, 'lxml')
72 d = soup2.find_all('div', id='content')
73 id1 = BeautifulSoup(str(d), 'lxml')
74 # 创建文件名
75 src = self.name[i] + '.txt'
76 filename = c + '/' + src
77 print(filename)
78
79 # 将解析到的小说正文写到文件中
80 for result in id1:
81 res = result.text
82 id2 = soup2.select('#content')
83 with open(filename, 'w+', encoding='utf-8') as f:
84 f.write(res)
85 i += 1
86 #如果输入的网址不是正确的网址,则提示请输入正确的笔趣阁网址
87 def Main(self):
88 try:
89 d = downloader()
90 d.Get_url()
91 except:
92 print('没有找到')
93 else:
94 d.Response()
95 d.file()
96
97
98
99 if __name__ == '__main__':
100 # url=input('请输入网址:')
101 # url='https://www.biqugex.com/book_104027/'
102 a = downloader()
103 a.Main()