modify scripts

2025-07-02 09:05:59 +08:00
parent 8cd0a67b64
commit f1a9287834
2 changed files with 8 additions and 3 deletions
--- a/iafd/src/fetch.py
+++ b/iafd/src/fetch.py
@ -241,7 +241,7 @@ def fetch_movies_by_dist():
                else :
                    logging.warning(f'parse_page_movie error. url: {url}')
                    time.sleep(1)
-            elif status_code  and status_code == 404:
+            elif status_code  and status_code in [scraper.http_code_404, scraper.http_code_login, scraper.http_code_url]:
                logging.warning(f'fetch page error. httpcode: {status_code}, url: {url}, Skiping...')
                break
            else:
@ -281,7 +281,7 @@ def fetch_movies_by_stu():
                else :
                    logging.warning(f'parse_page_movie error. url: {url}')
                    time.sleep(1)
-            elif status_code  and status_code == 404:
+            elif status_code  and status_code in [scraper.http_code_404, scraper.http_code_login, scraper.http_code_url]:
                logging.warning(f'fetch page error. httpcode: {status_code}, url: {url}, Skiping...')
                break
            else:
--- a/iafd/src/iafd_scraper.py
+++ b/iafd/src/iafd_scraper.py
@ -70,7 +70,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
                return None, http_code_404  # 直接返回 404，调用方可以跳过
            
            response.raise_for_status()  # 处理 HTTP 错误
-
+                    
            # 过期的网页，与404相同处理
            if "invalid or outdated page" in response.text.lower():
                logging.debug(f"invalid or outdated page: {url}")
@ -85,6 +85,11 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
            soup = BeautifulSoup(html_text, parser)
            if validator(soup):  # 进行自定义页面检查
                return soup, response.status_code
+            else:
+                # 检查是否发生跳转，比如到登录页面
+                if response.history:
+                    logging.warning(f"Page redirected on {url}. Validation failed.")
+                    return None, http_code_login

            logging.warning(f"Validation failed on attempt {attempt + 1} for {url}")
        except cloudscraper.exceptions.CloudflareChallengeError as e: