modify scripts

2025-07-06 19:12:51 +08:00
parent b04c43f0ab
commit 1ab4d55483
6 changed files with 24 additions and 9 deletions
--- a/scrapy_proj/pbox/1.json
+++ b/scrapy_proj/pbox/1.json
--- a/scrapy_proj/pbox/2.json
+++ b/scrapy_proj/pbox/2.json
--- a/scrapy_proj/pbox/3.json
+++ b/scrapy_proj/pbox/3.json
--- a/scrapy_proj/pbox/4.json
+++ b/scrapy_proj/pbox/4.json
--- a/scrapy_proj/pbox/5.json
+++ b/scrapy_proj/pbox/5.json
--- a/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
+++ b/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
@ -44,18 +44,30 @@ class PornboxSpider(BaseSpider):
        self.update = int(update)
        self.logger.info(f"debug mod: {self.debug}, cmd: {self.cmd_str}, update: {self.update}")

+        self.cmd_studio = 'studio'
+        self.cmd_movie = 'movies'
+        self.cmd_actors = 'actors'
+        self.cmd_list = self.cmd_str.split(',')
+        if len(self.cmd_list) == 0 :
+            self.cmd_list = [self.cmd_studio, self.cmd_movie, self.cmd_actors]
+
    # 入口函数，由基类的方法触发
    def custom_start_requests(self):
        # studios 列表
-        url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
-        #yield scrapy.Request(url, callback=self.parse_studios_list)
+        if self.cmd_studio in self.cmd_list:
+            url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
+            yield scrapy.Request(url, callback=self.parse_studios_list)

        # 获取每个stutio, 获取详情
-        stu_list = db_tools.get_studios(limit=1 if self.debug else 100)
-        for stu in stu_list:
-            stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
-            url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
-            yield scrapy.Request(url, callback=self.parse_studio, meta={'sdu_href':stu_url})
+        if self.cmd_movie in self.cmd_list:
+            fitlers= {}
+            if self.debug :
+                fitlers['limit'] = 5
+            stu_list = db_tools.get_studios(**fitlers)
+            for stu in stu_list:
+                stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
+                url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
+                yield scrapy.Request(url, callback=self.parse_studio, meta={'sdu_href':stu_url})


    def parse_studios_list(self, response):
@ -106,8 +118,11 @@ class PornboxSpider(BaseSpider):
        # 处理分页
        if current_page < total_pages:
            next_page = current_page + 1
-            next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
-            yield scrapy.Request(next_url, callback=self.parse_studios_list)
+            if self.debug and current_page >= 5:
+                pass
+            else:
+                next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
+                yield scrapy.Request(next_url, callback=self.parse_studios_list)


    def parse_studio(self, response):