Scrapy 简单实现抓取 最大资源网 可以自己在电脑 手机直接看 想看啥分类 在代码里更改

首先创建 itemSpider
在spiders里创建 item_spider.py 写入
""" 语言版本: python:3.6.1 scrapy:1.3.3 """ import scrapy import re from scrapy.http import Request class itemSpider(scrapy.Spider): name = 'niubi' start_urls = ['http://www.zuidazy5.net/?m=vod-type-id-8'] #8改成自己 想抓取的分类 def parse(self, response): urls = response.xpath("//span[@class='xing_vb4']//@href").extract() c = [] url1 = ['http://www.zuidazy5.net/'] for i in range(len(urls)): c1 = url1[0] + urls[i] c.append(c1) for url3 in c: yield scrapy.Request(url3, callback=self.get_title) next_page1 = response.xpath('//a[@target="_self"][text()="下一页"]//@href').extract() d = [] for i in range(len(next_page1)): d1 = url1[0] + next_page1[i] d.append(d1) for g in d: if d is not None: g = response.urljoin(g) yield scrapy.Request(g, callback=self.parse) def get_title(self, response): #item = IPpronsItem() mingyan = response.xpath('/html/body/div[5]') IP = mingyan.xpath("//div[@class='vodh']//h2//text()").extract_first() port = mingyan.xpath('//*[@id="play_1"]/ul/li/text()').extract_first() port = re.findall('[a-zA-Z]+://[^\s]*[.com|.cn]*[.m3u8]', port) # IP = ','.join(IP) port = ','.join(port) fileName = '科幻片.txt' # 爬取的内容存入文件 对应分类名字自己改 f = open(fileName, "a+", encoding='utf-8') # 追加写入文件 f.write(port+',') f.write('\n') f.write(IP+',') f.close()
其他都不用修改 直接 运行就可以抓取 自己选择的分类了。
    如果人生还有重来,那就不叫人生。
 
                    
                
 
                
            
         浙公网安备 33010602011771号
浙公网安备 33010602011771号