modify scripts

This commit is contained in:
oscarz
2025-06-26 09:29:03 +08:00
parent 1387841aab
commit f0fdcf60a4

View File

@ -96,7 +96,7 @@ def fetch_movies_common(tbl):
row_id = row['id'] row_id = row['id']
uncensored = row['uncensored'] if row['uncensored'] > 0 else None uncensored = row['uncensored'] if row['uncensored'] > 0 else None
if not utils.is_valid_url(url): if not utils.is_valid_url(url):
logging.info(f'invalid url {url} in {tbl}, skipping...') logging.info(f'invalid url ({url}) in {tbl}, row id: {row_id}. skipping...')
continue continue
# 去掉可下载的标志(如果有) # 去掉可下载的标志(如果有)
next_url = url next_url = url
@ -163,8 +163,9 @@ def update_multilang_common(tbl):
for row in url_list: for row in url_list:
url = row['href'] url = row['href']
row_id = row['id']
if not utils.is_valid_url(url): if not utils.is_valid_url(url):
logging.info(f'invalid url {url} in {tbl}, skipping...') logging.info(f'invalid url ({url}) in {tbl}, row id: {row_id}. skipping...')
continue continue
langs_url = utils.generate_multilang_urls(url) langs_url = utils.generate_multilang_urls(url)
for lang, next_url in langs_url.items(): for lang, next_url in langs_url.items():
@ -216,8 +217,9 @@ def update_multilang_tags():
for row in url_list: for row in url_list:
url = row['href'] url = row['href']
row_id = row['id']
if not utils.is_valid_url(url): if not utils.is_valid_url(url):
logging.info(f'invalid url {url}, skipping...') logging.info(f'invalid url ({url}), row id: {row_id}. skipping...')
continue continue
langs_url = utils.generate_multilang_urls(url) langs_url = utils.generate_multilang_urls(url)
for lang, next_url in langs_url.items(): for lang, next_url in langs_url.items():
@ -292,6 +294,9 @@ def fetch_performers_detail():
person = performer['name'] person = performer['name']
uncensored = int(performer['uncensored']) uncensored = int(performer['uncensored'])
avatar = None avatar = None
if not utils.is_valid_url(url):
logging.info(f'invalid url ({url}), name: {person}. skipping...')
continue
next_url = url next_url = url
all_movies = [] all_movies = []
@ -388,6 +393,10 @@ def fetch_movies_detail():
title = movie['title'] title = movie['title']
curr_id = movie['id'] curr_id = movie['id']
uncensored = int(movie['uncensored']) uncensored = int(movie['uncensored'])
if not utils.is_valid_url(url):
logging.info(f'invalid url ({url}), row id: {curr_id}. skipping...')
continue
logging.debug(f"Fetching data for movie ({title}), url {url} ...") logging.debug(f"Fetching data for movie ({title}), url {url} ...")
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="container", attr_type="class")) soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="container", attr_type="class"))
# 从本地读取的文件,忽略 # 从本地读取的文件,忽略