modify scripts

This commit is contained in:
oscarz
2025-06-26 09:11:41 +08:00
parent ad9c9094c9
commit 54c073532c

View File

@ -61,7 +61,7 @@ def fetch_actor_list_lang(lang="en", uncensored=None):
if debug:
return True
# 获取演员列表
# 获取演员列表,控制逻辑,多语言
def fetch_actor_list():
if g_uncensored == 1:
for lang in ["en", "ja", "zh"]:
@ -95,10 +95,13 @@ def fetch_movies_common(tbl):
url = row['href']
row_id = row['id']
uncensored = row['uncensored'] if row['uncensored'] > 0 else None
if not utils.is_valid_url(url):
logging.info(f'invalid url {url} in {tbl}, skipping...')
continue
# 去掉可下载的标志(如果有)
next_url = url
while next_url:
logging.info(f"Fetching data for maker url {next_url} ...")
logging.info(f"Fetching data from {tbl} url {next_url} ...")
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="waterfall", attr_type="id"))
if soup:
list_data, next_url = scraper.parse_studios_labels_series_detail(soup, next_url)
@ -125,6 +128,10 @@ def fetch_movies_common(tbl):
elif status_code and status_code == 404:
logging.warning(f"fetch page error. httpcode: {status_code}, url: {next_url}")
break
else: # 达到失败上限,加上休眠继续重试
time.sleep(5)
time.sleep(0.3)
# 调试增加brak
if debug:
@ -142,7 +149,7 @@ def fetch_movies_by_label():
def fetch_movies_by_series():
fetch_movies_common('series')
# 从studio/label/series中获取影片
# 从studio/label/series首页获取他们的多语言表述
def update_multilang_common(tbl):
if debug:
url_list = db_tools.query_list_common(tbl=tbl, limit=3)
@ -162,7 +169,7 @@ def update_multilang_common(tbl):
langs_url = utils.generate_multilang_urls(url)
for lang, next_url in langs_url.items():
while next_url:
logging.info(f"Fetching data for url {next_url} ..., raw url: {url}")
logging.info(f"Fetching data for url {next_url} ...")
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="waterfall", attr_type="id"))
if soup:
list_data, next_url = scraper.parse_studios_labels_series_detail(soup, next_url)
@ -186,13 +193,21 @@ def update_multilang_common(tbl):
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}')
break
else: # 达到失败上限,加上休眠继续重试
time.sleep(5)
time.sleep(0.3)
if debug:
break
# 更新series列表中的影片信息
def update_multi_langs():
update_multilang_common('studio')
update_multilang_common('label')
update_multilang_common('series')
# 从studio/label/series中获取影片
# 获取影片tags的多语言表述
def update_multilang_tags():
if debug:
url_list = db_tools.query_tags(limit=5)
@ -231,6 +246,13 @@ def update_multilang_tags():
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}')
break
else: # 达到失败上限,加上休眠继续重试
time.sleep(5)
time.sleep(0.3)
if debug:
break
# 更新演员信息
def fetch_performers_detail():
limit_count = 5 if debug else 100
@ -296,6 +318,7 @@ def fetch_performers_detail():
break
else:
logging.warning(f'fetch_page error. url: {url}')
time.sleep(2)
# 如果出现了401或者404已经处理直接跳过
if not need_insert:
@ -388,6 +411,7 @@ def fetch_movies_detail():
logging.warning(f'insert movie {url} failed.')
else:
logging.warning(f'parse_page_movie error. url: {url}')
time.sleep(2)
elif status_code and status_code == craw.http_code_404:
movie_id = db_tools.insert_or_update_movie_404({'href': url, 'is_full_data': craw.http_code_404})