python scrapy 重复执行

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
import time
import logging
from scrapy.utils.project import get_project_settings


#在控制台打印日志
configure_logging()
#CrawlerRunner获取settings.py里的设置信息
runner = CrawlerRunner(get_project_settings())

@defer.inlineCallbacks
def crawl():
    while True:
        logging.info("new cycle starting")
        yield runner.crawl("xxxxx")
        #1s跑一次
        time.sleep(1)
    reactor.stop()

crawl()
reactor.run()

posted @ 2019-12-27 12:35 winstonsias 阅读(832) 评论(0) 收藏举报

刷新页面返回顶部

winstonsias

python scrapy 重复执行

公告