我找到了解决问题的方法!我想做什么?
我通过像这样处理蜘蛛的错误来管理:
import time class mySpider(scrapy.Spider): name = "myspider" allowed_domains = ["google.com"] start_urls = [ "http://www.google.com", ] def handle_error(self, failure): self.log("Error Handle: %s" % failure.request) self.log("Sleeping 60 seconds") time.sleep(60) url = 'http://www.google.com' yield scrapy.Request(url, self.parse, errback=self.handle_error, dont_filter=True) def start_requests(self): url = 'http://www.google.com' yield scrapy.Request(url, self.parse, errback=self.handle_error)
dont_filter=True
errback=self.handle_error
handle_error