modify scripts

This commit is contained in:
oscarz
2025-04-25 16:23:01 +08:00
parent 4c80e72a98
commit 40b5d3f99a
3 changed files with 198 additions and 11 deletions

View File

@ -161,6 +161,42 @@ def fetch_movies_by_series():
if debug:
return True
# 更新series列表中的影片信息
def fetch_movies_by_publishers():
if fast_mode:
url_list = db_tools.query_publishers_hrefs(from_list=1)
else:
url_list = db_tools.query_publishers_hrefs()
if debug:
url_list = db_tools.query_publishers_hrefs(limit=1)
for row in url_list:
url = row['href']
row_id = row['id']
# 去掉可下载的标志(如果有)
next_url = utils.remove_url_query(url)
while next_url:
logging.info(f"Fetching data for publisher url {next_url} ...")
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="modal-card", attr_type="class"))
if soup:
list_data, next_url = scraper.parse_publisher_detail(soup, next_url)
if list_data:
for movie in list_data:
tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_publishers=1, pub_id=row_id)
if tmp_id:
logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}')
else:
logging.warning(f'insert movie index failed. title: {movie['title']}, href: {movie['href']}')
else :
logging.warning(f'parse_page_movie error. url: {next_url}')
elif status_code and status_code == 404:
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}')
break
# 调试增加brak
if debug:
return True
# 更新演员信息
def fetch_performers_detail():
@ -316,6 +352,7 @@ function_map = {
"series_list": fetch_series_list,
"makers": fetch_movies_by_maker,
"series" : fetch_movies_by_series,
"pub" : fetch_movies_by_publishers,
"movies" : fetch_movies_detail,
"actors" : fetch_performers_detail,
}