From be9850dfc0909d330472a31d7bafc503b795e9d6 Mon Sep 17 00:00:00 2001 From: oscarz Date: Wed, 2 Apr 2025 08:32:21 +0800 Subject: [PATCH] modify scripts --- iafd/src/fetch.py | 2 +- thelordofporn/config.py | 27 ------ thelordofporn/{ => src}/actress_fetch.py | 0 thelordofporn/src/config.py | 91 +++++++++++++++++++ thelordofporn/{tools.py => src/json_to_db.py} | 5 +- thelordofporn/{ => src}/list_fetch.py | 0 thelordofporn/{ => src}/top_scenes.py | 0 7 files changed, 96 insertions(+), 29 deletions(-) delete mode 100644 thelordofporn/config.py rename thelordofporn/{ => src}/actress_fetch.py (100%) create mode 100644 thelordofporn/src/config.py rename thelordofporn/{tools.py => src/json_to_db.py} (97%) rename thelordofporn/{ => src}/list_fetch.py (100%) rename thelordofporn/{ => src}/top_scenes.py (100%) diff --git a/iafd/src/fetch.py b/iafd/src/fetch.py index 7552182..02ede02 100644 --- a/iafd/src/fetch.py +++ b/iafd/src/fetch.py @@ -242,7 +242,7 @@ def fetch_performers_detail_once(perfomers_list): for performer in perfomers_list: url = performer['href'] person = performer['name'] - logging.info(f"Fetching data for performer ({person}), url {url} ...") + logging.debug(f"Fetching data for performer ({person}), url {url} ...") soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id")) if soup: data = scraper.parse_page_performer(soup) diff --git a/thelordofporn/config.py b/thelordofporn/config.py deleted file mode 100644 index 2a14298..0000000 --- a/thelordofporn/config.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging -import os -import inspect -from datetime import datetime - -# 映射到宿主机的目录 -home_dir = os.path.expanduser("~") -global_host_data_dir = f'{home_dir}/hostdir/scripts_data/thelordofporn' - -# 设置日志配置 -def setup_logging(log_filename=None): - # 如果未传入 log_filename,则使用当前脚本名称作为日志文件名 - if log_filename is None: - # 获取调用 setup_logging 的脚本文件名 - caller_frame = inspect.stack()[1] - caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0] - - # 获取当前日期,格式为 yyyymmdd - current_date = datetime.now().strftime('%Y%m%d') - # 拼接 log 文件名,将日期加在扩展名前 - log_filename = f'./log/{caller_filename}_{current_date}.log' - - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s', - handlers=[ - logging.FileHandler(log_filename), - logging.StreamHandler() - ]) \ No newline at end of file diff --git a/thelordofporn/actress_fetch.py b/thelordofporn/src/actress_fetch.py similarity index 100% rename from thelordofporn/actress_fetch.py rename to thelordofporn/src/actress_fetch.py diff --git a/thelordofporn/src/config.py b/thelordofporn/src/config.py new file mode 100644 index 0000000..5e637bc --- /dev/null +++ b/thelordofporn/src/config.py @@ -0,0 +1,91 @@ +import logging +import os +import inspect +import time +from datetime import datetime +from logging.handlers import RotatingFileHandler +from collections import defaultdict + +# 映射到宿主机的目录 +home_dir = os.path.expanduser("~") +global_host_data_dir = f'{home_dir}/hostdir/scripts_data/thelordofporn' +global_share_data_dir = f'{home_dir}/sharedata' + +# 统计日志频率 +log_count = defaultdict(int) # 记录日志的次数 +last_log_time = defaultdict(float) # 记录上次写入的时间戳 + +log_dir = '../log' +class RateLimitFilter(logging.Filter): + """ + 频率限制过滤器: + 1. 在 60 秒内,同样的日志最多写入 60 次,超过则忽略 + 2. 如果日志速率超过 100 条/秒,发出告警 + """ + LOG_LIMIT = 60 # 每分钟最多记录相同消息 10 次 + + def filter(self, record): + global log_count, last_log_time + message_key = record.getMessage() # 获取日志内容 + + # 计算当前时间 + now = time.time() + elapsed = now - last_log_time[message_key] + + # 限制相同日志的写入频率 + if elapsed < 60: # 60 秒内 + log_count[message_key] += 1 + if log_count[message_key] > self.LOG_LIMIT: + print('reach limit.') + return False # 直接丢弃 + else: + log_count[message_key] = 1 # 超过 60 秒,重新计数 + + last_log_time[message_key] = now + + return True # 允许写入日志 + + + +def setup_logging(log_filename=None): + if log_filename is None: + caller_frame = inspect.stack()[1] + caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0] + current_date = datetime.now().strftime('%Y%m%d') + os.makedirs(log_dir, exist_ok=True) + log_filename = f'{log_dir}/{caller_filename}_{current_date}.log' + #log_filename = f'../log/{caller_filename}_{current_date}.log' + + max_log_size = 100 * 1024 * 1024 # 10 MB + max_log_files = 10 # 最多保留 10 个日志文件 + + file_handler = RotatingFileHandler(log_filename, maxBytes=max_log_size, backupCount=max_log_files) + file_handler.setFormatter(logging.Formatter( + '%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s' + )) + + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter( + '%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s' + )) + + # 创建 logger + logger = logging.getLogger() + logger.setLevel(logging.INFO) + logger.handlers = [] # 避免重复添加 handler + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + # 添加频率限制 + rate_limit_filter = RateLimitFilter() + file_handler.addFilter(rate_limit_filter) + console_handler.addFilter(rate_limit_filter) + + +# 运行示例 +if __name__ == "__main__": + setup_logging() + + for i in range(1000): + logging.info("测试日志,检测频率限制") + time.sleep(0.01) # 模拟快速写入日志 \ No newline at end of file diff --git a/thelordofporn/tools.py b/thelordofporn/src/json_to_db.py similarity index 97% rename from thelordofporn/tools.py rename to thelordofporn/src/json_to_db.py index 9ef09f1..90f63d4 100644 --- a/thelordofporn/tools.py +++ b/thelordofporn/src/json_to_db.py @@ -3,11 +3,14 @@ import json import re import logging from datetime import datetime +import config def setup_logging(): logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -db_path = "/root/sharedata/shared.db" +#db_path = "/root/sharedata/shared.db" +# 连接 SQLite 数据库 +db_path = f"{config.global_share_data_dir}/sqlite/shared.db" # 替换为你的数据库文件 def connect_db(db_name=db_path): return sqlite3.connect(db_name) diff --git a/thelordofporn/list_fetch.py b/thelordofporn/src/list_fetch.py similarity index 100% rename from thelordofporn/list_fetch.py rename to thelordofporn/src/list_fetch.py diff --git a/thelordofporn/top_scenes.py b/thelordofporn/src/top_scenes.py similarity index 100% rename from thelordofporn/top_scenes.py rename to thelordofporn/src/top_scenes.py