modify scripts
This commit is contained in:
@ -220,8 +220,11 @@ def fetch_movies_detail():
|
|||||||
logging.warning(f'parse_page_movie error. url: {url}')
|
logging.warning(f'parse_page_movie error. url: {url}')
|
||||||
|
|
||||||
elif status_code and status_code == 404:
|
elif status_code and status_code == 404:
|
||||||
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url)
|
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=2)
|
||||||
logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
||||||
|
elif status_code and status_code == 401:
|
||||||
|
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=3)
|
||||||
|
logging.warning(f'401 page(need login). id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
||||||
else:
|
else:
|
||||||
logging.warning(f'fetch_page error. url: {url}')
|
logging.warning(f'fetch_page error. url: {url}')
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|||||||
@ -48,7 +48,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
|
|||||||
# 判断是否为登录页面,
|
# 判断是否为登录页面,
|
||||||
if soup.find('nav', class_='panel form-panel'):
|
if soup.find('nav', class_='panel form-panel'):
|
||||||
logging.warning(f"Page redirected to login page on {url}.")
|
logging.warning(f"Page redirected to login page on {url}.")
|
||||||
return None, 404
|
return None, 401
|
||||||
|
|
||||||
# 预处理 HTML(如果提供了 preprocessor)
|
# 预处理 HTML(如果提供了 preprocessor)
|
||||||
html_text = preprocessor(response.text) if preprocessor else response.text
|
html_text = preprocessor(response.text) if preprocessor else response.text
|
||||||
|
|||||||
@ -429,17 +429,17 @@ def insert_or_update_movie(movie):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# """插入或更新电影数据(异常url的处理,比如404链接)"""
|
# """插入或更新电影数据(异常url的处理,比如404链接)"""
|
||||||
def insert_or_update_movie_404(title, href):
|
def insert_or_update_movie_404(title, href, is_full_data=1):
|
||||||
try:
|
try:
|
||||||
# 插入或更新电影信息
|
# 插入或更新电影信息
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO javdb_movies (title, href, is_full_data, updated_at)
|
INSERT INTO javdb_movies (title, href, is_full_data, updated_at)
|
||||||
VALUES (?, ?, 1, datetime('now', 'localtime'))
|
VALUES (?, ?, ?, datetime('now', 'localtime'))
|
||||||
ON CONFLICT(href) DO UPDATE SET
|
ON CONFLICT(href) DO UPDATE SET
|
||||||
title=excluded.title, is_full_data=1, updated_at = datetime('now', 'localtime')
|
title=excluded.title, is_full_data=excluded.is_full_data, updated_at = datetime('now', 'localtime')
|
||||||
""",
|
""",
|
||||||
(title, href)
|
(title, href, is_full_data)
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user