modify scripts
This commit is contained in:
@ -220,8 +220,11 @@ def fetch_movies_detail():
|
||||
logging.warning(f'parse_page_movie error. url: {url}')
|
||||
|
||||
elif status_code and status_code == 404:
|
||||
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url)
|
||||
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=2)
|
||||
logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
||||
elif status_code and status_code == 401:
|
||||
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=3)
|
||||
logging.warning(f'401 page(need login). id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
||||
else:
|
||||
logging.warning(f'fetch_page error. url: {url}')
|
||||
time.sleep(1)
|
||||
|
||||
@ -48,7 +48,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
|
||||
# 判断是否为登录页面,
|
||||
if soup.find('nav', class_='panel form-panel'):
|
||||
logging.warning(f"Page redirected to login page on {url}.")
|
||||
return None, 404
|
||||
return None, 401
|
||||
|
||||
# 预处理 HTML(如果提供了 preprocessor)
|
||||
html_text = preprocessor(response.text) if preprocessor else response.text
|
||||
|
||||
@ -429,17 +429,17 @@ def insert_or_update_movie(movie):
|
||||
return None
|
||||
|
||||
# """插入或更新电影数据(异常url的处理,比如404链接)"""
|
||||
def insert_or_update_movie_404(title, href):
|
||||
def insert_or_update_movie_404(title, href, is_full_data=1):
|
||||
try:
|
||||
# 插入或更新电影信息
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO javdb_movies (title, href, is_full_data, updated_at)
|
||||
VALUES (?, ?, 1, datetime('now', 'localtime'))
|
||||
VALUES (?, ?, ?, datetime('now', 'localtime'))
|
||||
ON CONFLICT(href) DO UPDATE SET
|
||||
title=excluded.title, is_full_data=1, updated_at = datetime('now', 'localtime')
|
||||
title=excluded.title, is_full_data=excluded.is_full_data, updated_at = datetime('now', 'localtime')
|
||||
""",
|
||||
(title, href)
|
||||
(title, href, is_full_data)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user