scrapy电影天堂练习

movie.py

import scrapy
from movieProject.items import MovieprojectItem


class MovieSpider(scrapy.Spider):
    name = 'movie'
    allowed_domains = ['www.ygdy8.net']
    start_urls = ['https://www.ygdy8.net/html/gndy/china/index.html']

    def parse(self, response):
        print("电影天堂")
        movieList = response.xpath('//table//tr[2]/td[2]/b/a[2]')
        for item in movieList:
            movieName = item.xpath('./text()').extract_first()
            movieUrl = 'https://www.ygdy8.net' + item.xpath('./@href').extract_first()
            print(movieName, movieUrl)
        #使用meta将movieName传给secon_parse方法 yield scrapy.Request(url=movieUrl, callback=self.second_parse,meta={'movieName':movieName}) def second_parse(self,response): print("二次解析之前")
      #打开src进入到详情页面,然后获取详情页面的图片地址 secondUrl = response.xpath('//div[@id="Zoom"]//img/@src').extract_first() print("第二次访问", secondUrl) movieName = response.meta['movieName'] movie = MovieprojectItem(movieName=movieName, movieUrl=secondUrl) yield movie

  items.py

class MovieprojectItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    movieName = scrapy.Field()
    movieUrl = scrapy.Field()
    pass

  pipelines.py

class MovieprojectPipeline:
    def open_spider(self, spider):
        self.fp = open('movie.json', 'w', encoding='utf-8')

    def process_item(self, item, spider):
        self.fp.write(str(item))
        return item

    def close_spider(self, spider):
        self.fp.close()

  

posted @ 2023-10-05 09:48  sgj191024  阅读(84)  评论(0)    收藏  举报