modify scripts

This commit is contained in:
oscarz
2025-03-30 14:35:58 +08:00
parent 8a249eb3ae
commit c6ebe185be
3 changed files with 9 additions and 6 deletions

View File

@ -220,8 +220,11 @@ def fetch_movies_detail():
logging.warning(f'parse_page_movie error. url: {url}') logging.warning(f'parse_page_movie error. url: {url}')
elif status_code and status_code == 404: elif status_code and status_code == 404:
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url) movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=2)
logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...') logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...')
elif status_code and status_code == 401:
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=3)
logging.warning(f'401 page(need login). id: {movie_id}, title: ({title}), url: {url}, Skiping...')
else: else:
logging.warning(f'fetch_page error. url: {url}') logging.warning(f'fetch_page error. url: {url}')
time.sleep(1) time.sleep(1)

View File

@ -48,7 +48,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
# 判断是否为登录页面, # 判断是否为登录页面,
if soup.find('nav', class_='panel form-panel'): if soup.find('nav', class_='panel form-panel'):
logging.warning(f"Page redirected to login page on {url}.") logging.warning(f"Page redirected to login page on {url}.")
return None, 404 return None, 401
# 预处理 HTML如果提供了 preprocessor # 预处理 HTML如果提供了 preprocessor
html_text = preprocessor(response.text) if preprocessor else response.text html_text = preprocessor(response.text) if preprocessor else response.text

View File

@ -429,17 +429,17 @@ def insert_or_update_movie(movie):
return None return None
# """插入或更新电影数据(异常url的处理比如404链接)""" # """插入或更新电影数据(异常url的处理比如404链接)"""
def insert_or_update_movie_404(title, href): def insert_or_update_movie_404(title, href, is_full_data=1):
try: try:
# 插入或更新电影信息 # 插入或更新电影信息
cursor.execute( cursor.execute(
""" """
INSERT INTO javdb_movies (title, href, is_full_data, updated_at) INSERT INTO javdb_movies (title, href, is_full_data, updated_at)
VALUES (?, ?, 1, datetime('now', 'localtime')) VALUES (?, ?, ?, datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET ON CONFLICT(href) DO UPDATE SET
title=excluded.title, is_full_data=1, updated_at = datetime('now', 'localtime') title=excluded.title, is_full_data=excluded.is_full_data, updated_at = datetime('now', 'localtime')
""", """,
(title, href) (title, href, is_full_data)
) )
conn.commit() conn.commit()