From 1ab4d55483109379835925e9d060e54ffb0e4340 Mon Sep 17 00:00:00 2001
From: oscarz <oscar@vip.qq.com>
Date: Sun, 6 Jul 2025 19:12:51 +0800
Subject: [PATCH] modify scripts

---
 scrapy_proj/pbox/1.json                       |  0
 scrapy_proj/pbox/2.json                       |  0
 scrapy_proj/pbox/3.json                       |  0
 scrapy_proj/pbox/4.json                       |  0
 scrapy_proj/pbox/5.json                       |  0
 .../scrapy_proj/spiders/pornbox_spider.py     | 33 ++++++++++++++-----
 6 files changed, 24 insertions(+), 9 deletions(-)
 create mode 100644 scrapy_proj/pbox/1.json
 create mode 100644 scrapy_proj/pbox/2.json
 create mode 100644 scrapy_proj/pbox/3.json
 create mode 100644 scrapy_proj/pbox/4.json
 create mode 100644 scrapy_proj/pbox/5.json

diff --git a/scrapy_proj/pbox/1.json b/scrapy_proj/pbox/1.json
new file mode 100644
index 0000000..e69de29
diff --git a/scrapy_proj/pbox/2.json b/scrapy_proj/pbox/2.json
new file mode 100644
index 0000000..e69de29
diff --git a/scrapy_proj/pbox/3.json b/scrapy_proj/pbox/3.json
new file mode 100644
index 0000000..e69de29
diff --git a/scrapy_proj/pbox/4.json b/scrapy_proj/pbox/4.json
new file mode 100644
index 0000000..e69de29
diff --git a/scrapy_proj/pbox/5.json b/scrapy_proj/pbox/5.json
new file mode 100644
index 0000000..e69de29
diff --git a/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py b/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
index 42aae60..b6f68f7 100644
--- a/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
+++ b/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
@@ -44,18 +44,30 @@ class PornboxSpider(BaseSpider):
         self.update = int(update)
         self.logger.info(f"debug mod: {self.debug}, cmd: {self.cmd_str}, update: {self.update}")
 
+        self.cmd_studio = 'studio'
+        self.cmd_movie = 'movies'
+        self.cmd_actors = 'actors'
+        self.cmd_list = self.cmd_str.split(',')
+        if len(self.cmd_list) == 0 :
+            self.cmd_list = [self.cmd_studio, self.cmd_movie, self.cmd_actors]
+
     # 入口函数，由基类的方法触发
     def custom_start_requests(self):
         # studios 列表
-        url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
-        #yield scrapy.Request(url, callback=self.parse_studios_list)
+        if self.cmd_studio in self.cmd_list:
+            url = "https://pornbox.com/studio/list/ppd?page=1&sort=popular"
+            yield scrapy.Request(url, callback=self.parse_studios_list)
 
         # 获取每个stutio, 获取详情
-        stu_list = db_tools.get_studios(limit=1 if self.debug else 100)
-        for stu in stu_list:
-            stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
-            url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
-            yield scrapy.Request(url, callback=self.parse_studio, meta={'sdu_href':stu_url})
+        if self.cmd_movie in self.cmd_list:
+            fitlers= {}
+            if self.debug :
+                fitlers['limit'] = 5
+            stu_list = db_tools.get_studios(**fitlers)
+            for stu in stu_list:
+                stu_url = f"https://pornbox.com/studio/{stu['label_id']}"
+                url = f"{stu_url}/?skip=1&sort=recent&_={int(datetime.now().timestamp()*1000)}"
+                yield scrapy.Request(url, callback=self.parse_studio, meta={'sdu_href':stu_url})
 
 
     def parse_studios_list(self, response):
@@ -106,8 +118,11 @@ class PornboxSpider(BaseSpider):
         # 处理分页
         if current_page < total_pages:
             next_page = current_page + 1
-            next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
-            yield scrapy.Request(next_url, callback=self.parse_studios_list)
+            if self.debug and current_page >= 5:
+                pass
+            else:
+                next_url = f"https://pornbox.com/studio/list/ppd?page={next_page}&sort=popular"
+                yield scrapy.Request(next_url, callback=self.parse_studios_list)
 
 
     def parse_studio(self, response):