modify scripts

This commit is contained in:
oscarz
2025-03-30 14:35:58 +08:00
parent 8a249eb3ae
commit c6ebe185be
3 changed files with 9 additions and 6 deletions

View File

@ -220,8 +220,11 @@ def fetch_movies_detail():
logging.warning(f'parse_page_movie error. url: {url}')
elif status_code and status_code == 404:
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url)
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=2)
logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...')
elif status_code and status_code == 401:
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=3)
logging.warning(f'401 page(need login). id: {movie_id}, title: ({title}), url: {url}, Skiping...')
else:
logging.warning(f'fetch_page error. url: {url}')
time.sleep(1)

View File

@ -48,7 +48,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
# 判断是否为登录页面,
if soup.find('nav', class_='panel form-panel'):
logging.warning(f"Page redirected to login page on {url}.")
return None, 404
return None, 401
# 预处理 HTML如果提供了 preprocessor
html_text = preprocessor(response.text) if preprocessor else response.text

View File

@ -429,17 +429,17 @@ def insert_or_update_movie(movie):
return None
# """插入或更新电影数据(异常url的处理比如404链接)"""
def insert_or_update_movie_404(title, href):
def insert_or_update_movie_404(title, href, is_full_data=1):
try:
# 插入或更新电影信息
cursor.execute(
"""
INSERT INTO javdb_movies (title, href, is_full_data, updated_at)
VALUES (?, ?, 1, datetime('now', 'localtime'))
VALUES (?, ?, ?, datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET
title=excluded.title, is_full_data=1, updated_at = datetime('now', 'localtime')
title=excluded.title, is_full_data=excluded.is_full_data, updated_at = datetime('now', 'localtime')
""",
(title, href)
(title, href, is_full_data)
)
conn.commit()