modify scripts

This commit is contained in:
oscarz
2025-07-06 19:12:51 +08:00
parent b04c43f0ab
commit 1ab4d55483
6 changed files with 24 additions and 9 deletions

0
scrapy_proj/pbox/1.json Normal file
View File

0
scrapy_proj/pbox/2.json Normal file
View File

0
scrapy_proj/pbox/3.json Normal file
View File

0
scrapy_proj/pbox/4.json Normal file
View File

0
scrapy_proj/pbox/5.json Normal file
View File

View File

@ -44,14 +44,26 @@ class PornboxSpider(BaseSpider):
self.update = int(update) self.update = int(update)
self.logger.info(f"debug mod: {self.debug}, cmd: {self.cmd_str}, update: {self.update}") self.logger.info(f"debug mod: {self.debug}, cmd: {self.cmd_str}, update: {self.update}")
self.cmd_studio = 'studio'
self.cmd_movie = 'movies'
self.cmd_actors = 'actors'
self.cmd_list = self.cmd_str.split(',')
if len(self.cmd_list) == 0 :
self.cmd_list = [self.cmd_studio, self.cmd_movie, self.cmd_actors]
# 入口函数,由基类的方法触发 # 入口函数,由基类的方法触发
def custom_start_requests(self): def custom_start_requests(self):
# studios 列表 # studios 列表
if self.cmd_studio in self.cmd_list:
url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular" url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
#yield scrapy.Request(url, callback=self.parse_studios_list) yield scrapy.Request(url, callback=self.parse_studios_list)
# 获取每个stutio, 获取详情 # 获取每个stutio, 获取详情
stu_list = db_tools.get_studios(limit=1 if self.debug else 100) if self.cmd_movie in self.cmd_list:
fitlers= {}
if self.debug :
fitlers['limit'] = 5
stu_list = db_tools.get_studios(**fitlers)
for stu in stu_list: for stu in stu_list:
stu_url = f"https://pornbox.com/studio/{stu['label_id']}" stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}" url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
@ -106,6 +118,9 @@ class PornboxSpider(BaseSpider):
# 处理分页 # 处理分页
if current_page < total_pages: if current_page < total_pages:
next_page = current_page + 1 next_page = current_page + 1
if self.debug and current_page >= 5:
pass
else:
next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular" next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
yield scrapy.Request(next_url, callback=self.parse_studios_list) yield scrapy.Request(next_url, callback=self.parse_studios_list)