modify scripts
This commit is contained in:
@ -86,7 +86,14 @@ STATS_EXPORT_SCRIPT = 'scrapy_proj/extensions/push_to_wecom.sh' # 本地shell
|
|||||||
TWISTED_REACTOR = 'twisted.internet.epollreactor.EPollReactor' # 适用于Linux
|
TWISTED_REACTOR = 'twisted.internet.epollreactor.EPollReactor' # 适用于Linux
|
||||||
|
|
||||||
# 允许 404 状态码被 Spider 处理
|
# 允许 404 状态码被 Spider 处理
|
||||||
HTTPERROR_ALLOWED_CODES = [404]
|
#HTTPERROR_ALLOWED_CODES = [404]
|
||||||
|
# 允许 Spider 接收所有非 200 响应
|
||||||
|
HTTPERROR_ALLOW_ALL = True
|
||||||
|
RETRY_ENABLED = True
|
||||||
|
RETRY_TIMES = 5
|
||||||
|
RETRY_HTTP_CODES = [500, 502, 503, 504, 521, 522, 524]
|
||||||
|
RETRY_BACKOFF_BASE = 3
|
||||||
|
RETRY_BACKOFF_MAX = 60
|
||||||
|
|
||||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||||
#USER_AGENT = "scrapy_proj (+http://www.yourdomain.com)"
|
#USER_AGENT = "scrapy_proj (+http://www.yourdomain.com)"
|
||||||
|
|||||||
Reference in New Issue
Block a user