爬取起点
查看源代码

标签

#获取各书的id import re import requests def book_list(): url = 'https://www.qidian.com/rank' # 打开相应url并把页面作为返回 html = requests.get(url).text print(html) ren = r' </a><img src="//bookcover.yuewen.com/qdbimg/349573/1010868264/90">' ren_url = re.compile(ren) book_url = ren_url.findall(html) return book_url()

书名
#获取书名 import re import requests def book_name(): url = 'https://book.qidian.com/info/1010868264' html = requests.get(url).text ren = r' <h1>.*?<em>(.*?)</em>' ren_name = re.compile(ren) name = ren_name.findall(html) name = ".".join(name) print("书名:"+name) return name book_name()
作者
#获取作者 import re import requests def book_authorname(): url = 'https://book.qidian.com/info/1010868264' html = requests.get(url).text ren = r"authorName: '(.*?)'" ren_autorname = re.compile(ren) authorname = ren_autorname.findall(html) authorname = ".".join(authorname) print("作者:" + authorname) return authorname book_authorname()
导入数据库
db = pymysql.connect(host='localhost', port=3306, user='root', password='123', db='spider', charset='utf8') cursor = db.cursor() sql1 = "insert into bookspider(book_url,book_name,book_authorname) values ('%s','%s','%s')" % (bookurl[0],bookname[0],booksummary[0],bookid[0]) cursor.execute(sql1) db.commit() print("存入数据库成功!")

浙公网安备 33010602011771号