From 3a7bd045d0a76612a6dac36e8af55b083bda2cf9 Mon Sep 17 00:00:00 2001 From: sophon Date: Fri, 18 Jul 2025 21:03:12 +0800 Subject: [PATCH] modify scripts --- .../db_wapper/spider_db_handler.py | 37 +++++++++++++++++++ scrapy_proj/scrapy_proj/spiders/clm_spider.py | 13 +++++++ 2 files changed, 50 insertions(+) diff --git a/scrapy_proj/scrapy_proj/db_wapper/spider_db_handler.py b/scrapy_proj/scrapy_proj/db_wapper/spider_db_handler.py index ec22c71..a1eb47d 100644 --- a/scrapy_proj/scrapy_proj/db_wapper/spider_db_handler.py +++ b/scrapy_proj/scrapy_proj/db_wapper/spider_db_handler.py @@ -117,6 +117,7 @@ class IAFDDBHandler(SQLiteDBHandler): self.tbl_name_performers = 'iafd_performers' self.tbl_name_movies = 'iafd_movies' self.uniq_key = 'href' + self.tbl_name_thelordofporn_actress = 'thelordofporn_actress' def insert_item(self, item): pass @@ -213,6 +214,42 @@ class IAFDDBHandler(SQLiteDBHandler): return None + # 按条件查询 href 列表 + def get_lord_actors(self, **filters): + try: + sql = f"SELECT href, pornstar as name, id FROM {self.tbl_name_thelordofporn_actress} WHERE 1=1" + params = [] + + conditions = { + "id": " AND id = ?", + "href": " AND href = ?", + "pornstar": " AND pornstar LIKE ?", + "start_id": " AND id > ?", + } + + for key, condition in conditions.items(): + if key in filters: + sql += condition + if key == "pornstar": + params.append(f"%{filters[key]}%") + else: + params.append(filters[key]) + + if "order_by" in filters: + # 注意:这里 order by 后面直接跟字段名,不能用占位符,否则会被当作字符串处理 + sql += f" ORDER BY {filters['order_by']} " + + if 'limit' in filters: + sql += " LIMIT ?" + params.append(filters["limit"]) + + self.cursor.execute(sql, params) + return [dict(row) for row in self.cursor.fetchall()] + except sqlite3.Error as e: + logging.error(f"查询 href 失败: {e}") + return None + + @register_handler(comm.SPIDER_NAME_PBOX) class PboxDBHandler(SQLiteDBHandler): def __init__(self, db_path=shared_db_path): diff --git a/scrapy_proj/scrapy_proj/spiders/clm_spider.py b/scrapy_proj/scrapy_proj/spiders/clm_spider.py index 500ddb5..6213a1f 100644 --- a/scrapy_proj/scrapy_proj/spiders/clm_spider.py +++ b/scrapy_proj/scrapy_proj/spiders/clm_spider.py @@ -5,6 +5,9 @@ from scrapy_proj.utils.utils import parse_size, parse_date_to_datetime from scrapy_proj.spiders.base_spider import BaseSpider, extract_text_from_element from scrapy_proj.items import ClmIndexItem, ClmKeyWordsItem from scrapy_proj.comm.comm_def import SPIDER_NAME_CLM, ITEM_TYPE_CLM_INDEX, ITEM_TYPE_CLM_KEYWORDS +from scrapy_proj.db_wapper.spider_db_handler import IAFDDBHandler + +db_tools = IAFDDBHandler() default_keywords = [ 'vixen', 'tushy', 'tushyraw', 'blacked', 'blackedraw', 'deeper', # vixen group @@ -56,6 +59,16 @@ class ClmSpider(BaseSpider): item['words'] = self.keywords if self.keywords else 'default keywords' yield item + if self.debug: + actors = db_tools.get_lord_actors(limit = 5) + else: + actors = db_tools.get_lord_actors() + if actors: + for row in actors: + list_words.append(row['name']) + else: + self.logger.warning(f"get_lord_actors error.") + for item in list_words: encoded_keyword = quote_plus(item.strip())