1.创建项目:scrapy startproject dushuproject
2.跳转到spiders路径 cd\dushuproject\dushuproject\spiders
3.创建爬虫类:scrapy genspider read www.dushu.com
import scrapy
from readPro.items import ReadproItem
class ReadnetSpider(scrapy.Spider):
name = 'readNet'
allowed_domains = ['www.dushu.com']
start_urls = ['https://www.dushu.com/book/1179_1.html']
base_url = 'https://www.dushu.com/book/1179_'
page = 1
def parse(self, response):
print("读书网")
img = response.xpath('//div[@class="bookslist"]//li//img')
for item in img:
src = item.xpath('./@data-original').extract_first()
name = item.xpath('./@alt').extract_first()
print(src,name)
book = ReadproItem(src=src,name=name)
yield book
pass
if self.page < 101:
self.page = self.page + 1
url = self.base_url + str(self.page) + '.html'
yield scrapy.Request(url=url, callback=self.parse)