modify scripts
This commit is contained in:
@ -117,6 +117,7 @@ class IAFDDBHandler(SQLiteDBHandler):
|
|||||||
self.tbl_name_performers = 'iafd_performers'
|
self.tbl_name_performers = 'iafd_performers'
|
||||||
self.tbl_name_movies = 'iafd_movies'
|
self.tbl_name_movies = 'iafd_movies'
|
||||||
self.uniq_key = 'href'
|
self.uniq_key = 'href'
|
||||||
|
self.tbl_name_thelordofporn_actress = 'thelordofporn_actress'
|
||||||
|
|
||||||
def insert_item(self, item):
|
def insert_item(self, item):
|
||||||
pass
|
pass
|
||||||
@ -213,6 +214,42 @@ class IAFDDBHandler(SQLiteDBHandler):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# 按条件查询 href 列表
|
||||||
|
def get_lord_actors(self, **filters):
|
||||||
|
try:
|
||||||
|
sql = f"SELECT href, pornstar as name, id FROM {self.tbl_name_thelordofporn_actress} WHERE 1=1"
|
||||||
|
params = []
|
||||||
|
|
||||||
|
conditions = {
|
||||||
|
"id": " AND id = ?",
|
||||||
|
"href": " AND href = ?",
|
||||||
|
"pornstar": " AND pornstar LIKE ?",
|
||||||
|
"start_id": " AND id > ?",
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, condition in conditions.items():
|
||||||
|
if key in filters:
|
||||||
|
sql += condition
|
||||||
|
if key == "pornstar":
|
||||||
|
params.append(f"%{filters[key]}%")
|
||||||
|
else:
|
||||||
|
params.append(filters[key])
|
||||||
|
|
||||||
|
if "order_by" in filters:
|
||||||
|
# 注意:这里 order by 后面直接跟字段名,不能用占位符,否则会被当作字符串处理
|
||||||
|
sql += f" ORDER BY {filters['order_by']} "
|
||||||
|
|
||||||
|
if 'limit' in filters:
|
||||||
|
sql += " LIMIT ?"
|
||||||
|
params.append(filters["limit"])
|
||||||
|
|
||||||
|
self.cursor.execute(sql, params)
|
||||||
|
return [dict(row) for row in self.cursor.fetchall()]
|
||||||
|
except sqlite3.Error as e:
|
||||||
|
logging.error(f"查询 href 失败: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@register_handler(comm.SPIDER_NAME_PBOX)
|
@register_handler(comm.SPIDER_NAME_PBOX)
|
||||||
class PboxDBHandler(SQLiteDBHandler):
|
class PboxDBHandler(SQLiteDBHandler):
|
||||||
def __init__(self, db_path=shared_db_path):
|
def __init__(self, db_path=shared_db_path):
|
||||||
|
|||||||
@ -5,6 +5,9 @@ from scrapy_proj.utils.utils import parse_size, parse_date_to_datetime
|
|||||||
from scrapy_proj.spiders.base_spider import BaseSpider, extract_text_from_element
|
from scrapy_proj.spiders.base_spider import BaseSpider, extract_text_from_element
|
||||||
from scrapy_proj.items import ClmIndexItem, ClmKeyWordsItem
|
from scrapy_proj.items import ClmIndexItem, ClmKeyWordsItem
|
||||||
from scrapy_proj.comm.comm_def import SPIDER_NAME_CLM, ITEM_TYPE_CLM_INDEX, ITEM_TYPE_CLM_KEYWORDS
|
from scrapy_proj.comm.comm_def import SPIDER_NAME_CLM, ITEM_TYPE_CLM_INDEX, ITEM_TYPE_CLM_KEYWORDS
|
||||||
|
from scrapy_proj.db_wapper.spider_db_handler import IAFDDBHandler
|
||||||
|
|
||||||
|
db_tools = IAFDDBHandler()
|
||||||
|
|
||||||
default_keywords = [
|
default_keywords = [
|
||||||
'vixen', 'tushy', 'tushyraw', 'blacked', 'blackedraw', 'deeper', # vixen group
|
'vixen', 'tushy', 'tushyraw', 'blacked', 'blackedraw', 'deeper', # vixen group
|
||||||
@ -56,6 +59,16 @@ class ClmSpider(BaseSpider):
|
|||||||
item['words'] = self.keywords if self.keywords else 'default keywords'
|
item['words'] = self.keywords if self.keywords else 'default keywords'
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
|
if self.debug:
|
||||||
|
actors = db_tools.get_lord_actors(limit = 5)
|
||||||
|
else:
|
||||||
|
actors = db_tools.get_lord_actors()
|
||||||
|
if actors:
|
||||||
|
for row in actors:
|
||||||
|
list_words.append(row['name'])
|
||||||
|
else:
|
||||||
|
self.logger.warning(f"get_lord_actors error.")
|
||||||
|
|
||||||
for item in list_words:
|
for item in list_words:
|
||||||
encoded_keyword = quote_plus(item.strip())
|
encoded_keyword = quote_plus(item.strip())
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user