modify scripts

2025-07-06 19:12:51 +08:00
parent b04c43f0ab
commit 1ab4d55483
6 changed files with 24 additions and 9 deletions
--- a/scrapy_proj/pbox/1.json
+++ b/scrapy_proj/pbox/1.json
--- a/scrapy_proj/pbox/2.json
+++ b/scrapy_proj/pbox/2.json
--- a/scrapy_proj/pbox/3.json
+++ b/scrapy_proj/pbox/3.json
--- a/scrapy_proj/pbox/4.json
+++ b/scrapy_proj/pbox/4.json
--- a/scrapy_proj/pbox/5.json
+++ b/scrapy_proj/pbox/5.json
--- a/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
+++ b/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
@ -44,14 +44,26 @@ class PornboxSpider(BaseSpider):
        self.update = int(update)
        self.logger.info(f"debug mod: {self.debug}, cmd: {self.cmd_str}, update: {self.update}")
        self.cmd_studio = 'studio'
        self.cmd_movie = 'movies'
        self.cmd_actors = 'actors'
        self.cmd_list = self.cmd_str.split(',')
        if len(self.cmd_list) == 0 :
            self.cmd_list = [self.cmd_studio, self.cmd_movie, self.cmd_actors]
    # 入口函数，由基类的方法触发
    def custom_start_requests(self):
        # studios 列表
        if self.cmd_studio in self.cmd_list:
            url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
-        #yield scrapy.Request(url, callback=self.parse_studios_list)
+            yield scrapy.Request(url, callback=self.parse_studios_list)
        # 获取每个stutio, 获取详情
-        stu_list = db_tools.get_studios(limit=1 if self.debug else 100)
+        if self.cmd_movie in self.cmd_list:
            fitlers= {}
            if self.debug :
                fitlers['limit'] = 5
            stu_list = db_tools.get_studios(**fitlers)
            for stu in stu_list:
                stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
                url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
@ -106,6 +118,9 @@ class PornboxSpider(BaseSpider):
        # 处理分页
        if current_page < total_pages:
            next_page = current_page + 1
            if self.debug and current_page >= 5:
                pass
            else:
                next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
                yield scrapy.Request(next_url, callback=self.parse_studios_list)