From c6ebe185be89d28019ada5cc47b2be31fa2a092b Mon Sep 17 00:00:00 2001 From: oscarz Date: Sun, 30 Mar 2025 14:35:58 +0800 Subject: [PATCH] modify scripts --- javdb/src/fetch.py | 5 ++++- javdb/src/scraper.py | 2 +- javdb/src/sqlite_utils.py | 8 ++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/javdb/src/fetch.py b/javdb/src/fetch.py index 738b477..726f698 100644 --- a/javdb/src/fetch.py +++ b/javdb/src/fetch.py @@ -220,8 +220,11 @@ def fetch_movies_detail(): logging.warning(f'parse_page_movie error. url: {url}') elif status_code and status_code == 404: - movie_id = db_tools.insert_or_update_movie_404(title=title, href=url) + movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=2) logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...') + elif status_code and status_code == 401: + movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=3) + logging.warning(f'401 page(need login). id: {movie_id}, title: ({title}), url: {url}, Skiping...') else: logging.warning(f'fetch_page error. url: {url}') time.sleep(1) diff --git a/javdb/src/scraper.py b/javdb/src/scraper.py index bf1ebd3..8fe772d 100644 --- a/javdb/src/scraper.py +++ b/javdb/src/scraper.py @@ -48,7 +48,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor # 判断是否为登录页面, if soup.find('nav', class_='panel form-panel'): logging.warning(f"Page redirected to login page on {url}.") - return None, 404 + return None, 401 # 预处理 HTML(如果提供了 preprocessor) html_text = preprocessor(response.text) if preprocessor else response.text diff --git a/javdb/src/sqlite_utils.py b/javdb/src/sqlite_utils.py index af6deb7..bff2457 100644 --- a/javdb/src/sqlite_utils.py +++ b/javdb/src/sqlite_utils.py @@ -429,17 +429,17 @@ def insert_or_update_movie(movie): return None # """插入或更新电影数据(异常url的处理,比如404链接)""" -def insert_or_update_movie_404(title, href): +def insert_or_update_movie_404(title, href, is_full_data=1): try: # 插入或更新电影信息 cursor.execute( """ INSERT INTO javdb_movies (title, href, is_full_data, updated_at) - VALUES (?, ?, 1, datetime('now', 'localtime')) + VALUES (?, ?, ?, datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET - title=excluded.title, is_full_data=1, updated_at = datetime('now', 'localtime') + title=excluded.title, is_full_data=excluded.is_full_data, updated_at = datetime('now', 'localtime') """, - (title, href) + (title, href, is_full_data) ) conn.commit()