diff --git a/scrapy_proj/pbox/1.json b/scrapy_proj/pbox/1.json deleted file mode 100644 index e69de29..0000000 diff --git a/scrapy_proj/pbox/2.json b/scrapy_proj/pbox/2.json deleted file mode 100644 index e69de29..0000000 diff --git a/scrapy_proj/pbox/3.json b/scrapy_proj/pbox/3.json deleted file mode 100644 index e69de29..0000000 diff --git a/scrapy_proj/pbox/4.json b/scrapy_proj/pbox/4.json deleted file mode 100644 index e69de29..0000000 diff --git a/scrapy_proj/pbox/5.json b/scrapy_proj/pbox/5.json deleted file mode 100644 index e69de29..0000000 diff --git a/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py b/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py index b6f68f7..6ea33d7 100644 --- a/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py +++ b/scrapy_proj/scrapy_proj/spiders/pornbox_spider.py @@ -90,8 +90,8 @@ class PornboxSpider(BaseSpider): self.logger.info(f"url: {response.url}, total: {total_pages}, items: {len(data.get('items', []))}") + ''' # 由于 item 中不包含页码信息,我们需要从 spider 的属性中获取 - # 注意:这种方法依赖于 spider 中保存了当前页码 json_dir = './pbox' os.makedirs(json_dir, exist_ok=True) file_path = os.path.join(json_dir, f"{current_page}.json") @@ -100,6 +100,7 @@ class PornboxSpider(BaseSpider): with open(file_path, 'w', encoding='utf-8') as f: pass #json.dump(data, f, ensure_ascii=False, indent=2) + ''' # 处理每个工作室项目 for item in data.get('items', []):