From 7e1b905afed95c7672e1dd32a41e5641252471cb Mon Sep 17 00:00:00 2001 From: sophon Date: Sat, 26 Jul 2025 19:13:40 +0800 Subject: [PATCH] modify scripts --- scrapy_proj/scrapy_proj/spiders/javbus_spider.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scrapy_proj/scrapy_proj/spiders/javbus_spider.py b/scrapy_proj/scrapy_proj/spiders/javbus_spider.py index 46a893f..af0c4c6 100644 --- a/scrapy_proj/scrapy_proj/spiders/javbus_spider.py +++ b/scrapy_proj/scrapy_proj/spiders/javbus_spider.py @@ -169,7 +169,7 @@ class JavbusSpiderSpider(BaseSpider): movies_cnt = titles.get('movies_cnt', 0) if not self.need_update_actor(href=actor_url, movies_cnt=movies_cnt): self.crawler.stats.inc_value(f"{self.name}/actor_done") - self.logger.info(f"actor ({actor_name}) up to date. skipping... url: {actor_url}") + self.logger.info(f"actor ({actor_name}) up to date. movies cnt: {movies_cnt} skipping... url: {actor_url}") return None # 需要更新了,先翻页 @@ -275,7 +275,9 @@ class JavbusSpiderSpider(BaseSpider): if href == '': return if is_valid_url(href): + # 只有en和ja,需要手动补齐一下zh langs_url = generate_multilang_urls(href) + langs_url['zh'] = href for lang, next_url in langs_url.items(): if not self._can_request(next_url): continue @@ -328,6 +330,7 @@ class JavbusSpiderSpider(BaseSpider): name_key = f"{lang}_name" if lang !='zh' else 'name' if name_key in item.fields: item[name_key] = title_meta.get('title') + self.logger.debug(f"movies list ({prefix}) title: {title_meta}") if not response.meta.get('from_cache'): yield item