From 73e8833bd095fa69c2dde84a4ef5c0378f2345eb Mon Sep 17 00:00:00 2001 From: sophon Date: Wed, 30 Jul 2025 10:43:47 +0800 Subject: [PATCH] modify scripts --- scrapy_proj/scrapy_proj/extensions/failure_monitor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapy_proj/scrapy_proj/extensions/failure_monitor.py b/scrapy_proj/scrapy_proj/extensions/failure_monitor.py index cf9e105..f0e0bf7 100644 --- a/scrapy_proj/scrapy_proj/extensions/failure_monitor.py +++ b/scrapy_proj/scrapy_proj/extensions/failure_monitor.py @@ -47,7 +47,8 @@ class FailureMonitorExtension: # 从request.meta中获取丢弃理由 drop_reason = reason if reason else request.meta.get('_dropreason', '未知原因') spider.logger.warning(f"request_dropped on url: {request.url} | 原因: {drop_reason}") - self.calculate_failure(spider) + if 'duplicate' not in drop_reason: # 重复请求不计入 + self.calculate_failure(spider) ''' Sent when a spider callback generates an error (i.e. raises an exception).