modify scripts

This commit is contained in:
oscarz
2025-07-06 19:12:51 +08:00
parent b04c43f0ab
commit 1ab4d55483
6 changed files with 24 additions and 9 deletions

0
scrapy_proj/pbox/1.json Normal file
View File

0
scrapy_proj/pbox/2.json Normal file
View File

0
scrapy_proj/pbox/3.json Normal file
View File

0
scrapy_proj/pbox/4.json Normal file
View File

0
scrapy_proj/pbox/5.json Normal file
View File

View File

@ -44,18 +44,30 @@ class PornboxSpider(BaseSpider):
self.update = int(update)
self.logger.info(f"debug mod: {self.debug}, cmd: {self.cmd_str}, update: {self.update}")
self.cmd_studio = 'studio'
self.cmd_movie = 'movies'
self.cmd_actors = 'actors'
self.cmd_list = self.cmd_str.split(',')
if len(self.cmd_list) == 0 :
self.cmd_list = [self.cmd_studio, self.cmd_movie, self.cmd_actors]
# 入口函数,由基类的方法触发
def custom_start_requests(self):
# studios 列表
url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
#yield scrapy.Request(url, callback=self.parse_studios_list)
if self.cmd_studio in self.cmd_list:
url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
yield scrapy.Request(url, callback=self.parse_studios_list)
# 获取每个stutio, 获取详情
stu_list = db_tools.get_studios(limit=1 if self.debug else 100)
for stu in stu_list:
stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
yield scrapy.Request(url, callback=self.parse_studio, meta={'sdu_href':stu_url})
if self.cmd_movie in self.cmd_list:
fitlers= {}
if self.debug :
fitlers['limit'] = 5
stu_list = db_tools.get_studios(**fitlers)
for stu in stu_list:
stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
yield scrapy.Request(url, callback=self.parse_studio, meta={'sdu_href':stu_url})
def parse_studios_list(self, response):
@ -106,8 +118,11 @@ class PornboxSpider(BaseSpider):
# 处理分页
if current_page < total_pages:
next_page = current_page + 1
next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
yield scrapy.Request(next_url, callback=self.parse_studios_list)
if self.debug and current_page >= 5:
pass
else:
next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
yield scrapy.Request(next_url, callback=self.parse_studios_list)
def parse_studio(self, response):