From f0fdcf60a44429b95c3d6e262d6cdff37084c746 Mon Sep 17 00:00:00 2001 From: oscarz Date: Thu, 26 Jun 2025 09:29:03 +0800 Subject: [PATCH] modify scripts --- src/javbus/fetch.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/javbus/fetch.py b/src/javbus/fetch.py index 1a93c53..07b9841 100644 --- a/src/javbus/fetch.py +++ b/src/javbus/fetch.py @@ -96,7 +96,7 @@ def fetch_movies_common(tbl): row_id = row['id'] uncensored = row['uncensored'] if row['uncensored'] > 0 else None if not utils.is_valid_url(url): - logging.info(f'invalid url {url} in {tbl}, skipping...') + logging.info(f'invalid url ({url}) in {tbl}, row id: {row_id}. skipping...') continue # 去掉可下载的标志(如果有) next_url = url @@ -163,8 +163,9 @@ def update_multilang_common(tbl): for row in url_list: url = row['href'] + row_id = row['id'] if not utils.is_valid_url(url): - logging.info(f'invalid url {url} in {tbl}, skipping...') + logging.info(f'invalid url ({url}) in {tbl}, row id: {row_id}. skipping...') continue langs_url = utils.generate_multilang_urls(url) for lang, next_url in langs_url.items(): @@ -216,8 +217,9 @@ def update_multilang_tags(): for row in url_list: url = row['href'] + row_id = row['id'] if not utils.is_valid_url(url): - logging.info(f'invalid url {url}, skipping...') + logging.info(f'invalid url ({url}), row id: {row_id}. skipping...') continue langs_url = utils.generate_multilang_urls(url) for lang, next_url in langs_url.items(): @@ -292,6 +294,9 @@ def fetch_performers_detail(): person = performer['name'] uncensored = int(performer['uncensored']) avatar = None + if not utils.is_valid_url(url): + logging.info(f'invalid url ({url}), name: {person}. skipping...') + continue next_url = url all_movies = [] @@ -388,6 +393,10 @@ def fetch_movies_detail(): title = movie['title'] curr_id = movie['id'] uncensored = int(movie['uncensored']) + if not utils.is_valid_url(url): + logging.info(f'invalid url ({url}), row id: {curr_id}. skipping...') + continue + logging.debug(f"Fetching data for movie ({title}), url {url} ...") soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="container", attr_type="class")) # 从本地读取的文件,忽略