modify scripts
This commit is contained in:
@ -41,6 +41,15 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
|
|||||||
|
|
||||||
response.raise_for_status() # 处理 HTTP 错误
|
response.raise_for_status() # 处理 HTTP 错误
|
||||||
|
|
||||||
|
# 检查是否发生跳转,比如到登录页面
|
||||||
|
if response.history:
|
||||||
|
logging.debug(f"Page redirected on {url}. Checking if it's a login page.")
|
||||||
|
soup = BeautifulSoup(response.text, parser)
|
||||||
|
# 判断是否为登录页面,
|
||||||
|
if soup.find('nav', class_='panel form-panel'):
|
||||||
|
logging.warning(f"Page redirected to login page on {url}.")
|
||||||
|
return None, 404
|
||||||
|
|
||||||
# 预处理 HTML(如果提供了 preprocessor)
|
# 预处理 HTML(如果提供了 preprocessor)
|
||||||
html_text = preprocessor(response.text) if preprocessor else response.text
|
html_text = preprocessor(response.text) if preprocessor else response.text
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user