modify scripts
This commit is contained in:
@ -169,7 +169,7 @@ class JavbusSpiderSpider(BaseSpider):
|
|||||||
movies_cnt = titles.get('movies_cnt', 0)
|
movies_cnt = titles.get('movies_cnt', 0)
|
||||||
if not self.need_update_actor(href=actor_url, movies_cnt=movies_cnt):
|
if not self.need_update_actor(href=actor_url, movies_cnt=movies_cnt):
|
||||||
self.crawler.stats.inc_value(f"{self.name}/actor_done")
|
self.crawler.stats.inc_value(f"{self.name}/actor_done")
|
||||||
self.logger.info(f"actor ({actor_name}) up to date. skipping... url: {actor_url}")
|
self.logger.info(f"actor ({actor_name}) up to date. movies cnt: {movies_cnt} skipping... url: {actor_url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 需要更新了,先翻页
|
# 需要更新了,先翻页
|
||||||
@ -275,7 +275,9 @@ class JavbusSpiderSpider(BaseSpider):
|
|||||||
if href == '':
|
if href == '':
|
||||||
return
|
return
|
||||||
if is_valid_url(href):
|
if is_valid_url(href):
|
||||||
|
# 只有en和ja,需要手动补齐一下zh
|
||||||
langs_url = generate_multilang_urls(href)
|
langs_url = generate_multilang_urls(href)
|
||||||
|
langs_url['zh'] = href
|
||||||
for lang, next_url in langs_url.items():
|
for lang, next_url in langs_url.items():
|
||||||
if not self._can_request(next_url):
|
if not self._can_request(next_url):
|
||||||
continue
|
continue
|
||||||
@ -328,6 +330,7 @@ class JavbusSpiderSpider(BaseSpider):
|
|||||||
name_key = f"{lang}_name" if lang !='zh' else 'name'
|
name_key = f"{lang}_name" if lang !='zh' else 'name'
|
||||||
if name_key in item.fields:
|
if name_key in item.fields:
|
||||||
item[name_key] = title_meta.get('title')
|
item[name_key] = title_meta.get('title')
|
||||||
|
self.logger.debug(f"movies list ({prefix}) title: {title_meta}")
|
||||||
|
|
||||||
if not response.meta.get('from_cache'):
|
if not response.meta.get('from_cache'):
|
||||||
yield item
|
yield item
|
||||||
|
|||||||
Reference in New Issue
Block a user