Scrapy 简单实现抓取 最大资源网 可以自己在电脑 手机直接看 想看啥分类 在代码里更改

 

 首先创建 itemSpider

在spiders里创建 item_spider.py  写入

"""
语言版本:

python:3.6.1
scrapy:1.3.3


"""

import scrapy
import re
from scrapy.http import Request

class itemSpider(scrapy.Spider):
    name = 'niubi'
    start_urls = ['http://www.zuidazy5.net/?m=vod-type-id-8'] #8改成自己 想抓取的分类


    def parse(self, response):

        urls = response.xpath("//span[@class='xing_vb4']//@href").extract()
        c = []
        url1 = ['http://www.zuidazy5.net/']
        for i in range(len(urls)):
            c1 = url1[0] + urls[i]
            c.append(c1)
        for url3 in c:
            yield scrapy.Request(url3, callback=self.get_title)
        next_page1 = response.xpath('//a[@target="_self"][text()="下一页"]//@href').extract()
        d = []
        for i in range(len(next_page1)):
            d1 = url1[0] + next_page1[i]
            d.append(d1)
        for g in d:
            if d is not None:
                g = response.urljoin(g)
                yield scrapy.Request(g, callback=self.parse)



    def get_title(self, response):
        #item = IPpronsItem()
        mingyan = response.xpath('/html/body/div[5]')
        IP = mingyan.xpath("//div[@class='vodh']//h2//text()").extract_first()
        port = mingyan.xpath('//*[@id="play_1"]/ul/li/text()').extract_first()
        port = re.findall('[a-zA-Z]+://[^\s]*[.com|.cn]*[.m3u8]', port)

        # IP = ','.join(IP)
        port = ','.join(port)

        fileName = '科幻片.txt'  # 爬取的内容存入文件 对应分类名字自己改
        f = open(fileName, "a+", encoding='utf-8')  # 追加写入文件
        f.write(port+',')

        f.write('\n')

        f.write(IP+',')

        f.close()

其他都不用修改 直接 运行就可以抓取 自己选择的分类了。

 

posted @ 2020-09-22 18:23  凹凸曼大人  阅读(638)  评论(0)    收藏  举报