斗破苍穹python爬虫

#-*- codeing =utf-8 -*-
#@Time : 2022/5/28 17:30
#@Author : huaobin
#@File : doupo.py
#@Software: PyCharm

import requests
import re
import time
from bs4 import BeautifulSoup
import os,sys

count=0
spath="./斗破苍穹/doupo.txt"
#f = open(path,'a+',encoding='utf-8')
def get_info(url):

    print(url)
    res = requests.get(url)
    res.encoding='GB2312'
    if res.status_code == 200:
        soup=BeautifulSoup(res.text, 'html.parser')
        content=soup.select("div#content")
        content2=soup.select("div.bookname h1")

        if len(content2)!=0:
            print(content2[0].text)
            name = content2[0].text + ".txt"
            path = "./斗破苍穹/" + name
            f = open(path, 'a+', encoding='utf-8')
            f.write(content2[0].text + '\n')
            f.write('\n')
            for con in content:
                f.write(con.text)
        else:
            pass


        '''
        
        10317000
                for con in content:
            f.write(con+'\n')
        else:
            pass
        
        '''

if __name__ =='__main__':
    urls = ['https://www.qbiqu.com/18_18902/{}.html'.format(str(i)) for i in range(10316495,10316500)]  #最大页数10317000


for url in urls: get_info(url) time.sleep(1)

  

posted @ 2022-05-28 08:00  青竹之下  阅读(161)  评论(0)    收藏  举报