爬虫练习【三方Pixiv】

网页链接
使用scrapy框架

class Huashi6Spider(scrapy.Spider):
    name = 'huashi6'
    allowed_domains = ['huashi6.com']
    mouth = 9

    base_url = 'https://rt.huashi6.com/front/works/rank_page?index={}&size=10&date=2021-{}-{}'
    start_urls = [f'https://rt.huashi6.com/front/works/rank_page?index={index}&size=10&date=2021-9-{day}' \
                  for day in range(1, 31)
                  for index in range(1, 6)]
    print(start_urls)

    def parse(self, response):
        # select = Selector(response)
        item = items.ChuzhanItem()
        print('HI')
        # f = open('pickLinks.txt', 'a')
        text = response.text
        list = re.findall('path":"(.*?)","width', text)

        for li in list:
            url = 'https://img2.huashi6.com/' + li
            item['url'] = url
            yield item

版权声明:仅作学习交流用途,侵权立删

posted @ 2021-10-02 01:29  Timor88  阅读(152)  评论(0)    收藏  举报