modify scripts

This commit is contained in:
2025-03-16 15:51:17 +08:00
parent 6a89b25792
commit f45ad35c6a

View File

@ -47,14 +47,14 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
# 处理 HTTP 状态码
if response.status_code == 404:
logging.warning(f"Page not found (404): {url}")
logging.debug(f"Page not found (404): {url}")
return None, 404 # 直接返回 404调用方可以跳过
response.raise_for_status() # 处理 HTTP 错误
# 过期的网页与404相同处理
if "invalid or outdated page" in response.text.lower():
logging.warning(f"invalid or outdated page: {url}")
logging.debug(f"invalid or outdated page: {url}")
return None, 404 # 直接返回 404调用方可以跳过
# 预处理 HTML如果提供了 preprocessor