diff --git a/scrapy_proj/scrapy_proj/settings.py b/scrapy_proj/scrapy_proj/settings.py index 1a4e466..7281e84 100644 --- a/scrapy_proj/scrapy_proj/settings.py +++ b/scrapy_proj/scrapy_proj/settings.py @@ -86,7 +86,14 @@ STATS_EXPORT_SCRIPT = 'scrapy_proj/extensions/push_to_wecom.sh' # 本地shell TWISTED_REACTOR = 'twisted.internet.epollreactor.EPollReactor' # 适用于Linux # 允许 404 状态码被 Spider 处理 -HTTPERROR_ALLOWED_CODES = [404] +#HTTPERROR_ALLOWED_CODES = [404] +# 允许 Spider 接收所有非 200 响应 +HTTPERROR_ALLOW_ALL = True +RETRY_ENABLED = True +RETRY_TIMES = 5 +RETRY_HTTP_CODES = [500, 502, 503, 504, 521, 522, 524] +RETRY_BACKOFF_BASE = 3 +RETRY_BACKOFF_MAX = 60 # Crawl responsibly by identifying yourself (and your website) on the user-agent #USER_AGENT = "scrapy_proj (+http://www.yourdomain.com)"