modify scripts

2025-07-06 19:15:01 +08:00
parent 1ab4d55483
commit 1224ea3efe
6 changed files with 2 additions and 1 deletions
--- a/scrapy_proj/pbox/1.json
+++ b/scrapy_proj/pbox/1.json
--- a/scrapy_proj/pbox/2.json
+++ b/scrapy_proj/pbox/2.json
--- a/scrapy_proj/pbox/3.json
+++ b/scrapy_proj/pbox/3.json
--- a/scrapy_proj/pbox/4.json
+++ b/scrapy_proj/pbox/4.json
--- a/scrapy_proj/pbox/5.json
+++ b/scrapy_proj/pbox/5.json
--- a/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
+++ b/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py
@ -90,8 +90,8 @@ class PornboxSpider(BaseSpider):
        self.logger.info(f"url: {response.url}, total: {total_pages}, items: {len(data.get('items', []))}")
        '''
        # 由于 item 中不包含页码信息，我们需要从 spider 的属性中获取
        # 注意：这种方法依赖于 spider 中保存了当前页码
        json_dir = './pbox'
        os.makedirs(json_dir, exist_ok=True)
        file_path = os.path.join(json_dir, f"{current_page}.json")
@ -100,6 +100,7 @@ class PornboxSpider(BaseSpider):
        with open(file_path, 'w', encoding='utf-8') as f:
            pass
            #json.dump(data, f, ensure_ascii=False, indent=2)
        '''
        # 处理每个工作室项目
        for item in data.get('items', []):