modify scripts

This commit is contained in:
oscarz
2025-04-02 08:32:21 +08:00
parent 5fcbfce6da
commit be9850dfc0
7 changed files with 96 additions and 29 deletions

View File

@ -242,7 +242,7 @@ def fetch_performers_detail_once(perfomers_list):
for performer in perfomers_list: for performer in perfomers_list:
url = performer['href'] url = performer['href']
person = performer['name'] person = performer['name']
logging.info(f"Fetching data for performer ({person}), url {url} ...") logging.debug(f"Fetching data for performer ({person}), url {url} ...")
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id")) soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id"))
if soup: if soup:
data = scraper.parse_page_performer(soup) data = scraper.parse_page_performer(soup)

View File

@ -1,27 +0,0 @@
import logging
import os
import inspect
from datetime import datetime
# 映射到宿主机的目录
home_dir = os.path.expanduser("~")
global_host_data_dir = f'{home_dir}/hostdir/scripts_data/thelordofporn'
# 设置日志配置
def setup_logging(log_filename=None):
# 如果未传入 log_filename则使用当前脚本名称作为日志文件名
if log_filename is None:
# 获取调用 setup_logging 的脚本文件名
caller_frame = inspect.stack()[1]
caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0]
# 获取当前日期,格式为 yyyymmdd
current_date = datetime.now().strftime('%Y%m%d')
# 拼接 log 文件名,将日期加在扩展名前
log_filename = f'./log/{caller_filename}_{current_date}.log'
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s',
handlers=[
logging.FileHandler(log_filename),
logging.StreamHandler()
])

View File

@ -0,0 +1,91 @@
import logging
import os
import inspect
import time
from datetime import datetime
from logging.handlers import RotatingFileHandler
from collections import defaultdict
# 映射到宿主机的目录
home_dir = os.path.expanduser("~")
global_host_data_dir = f'{home_dir}/hostdir/scripts_data/thelordofporn'
global_share_data_dir = f'{home_dir}/sharedata'
# 统计日志频率
log_count = defaultdict(int) # 记录日志的次数
last_log_time = defaultdict(float) # 记录上次写入的时间戳
log_dir = '../log'
class RateLimitFilter(logging.Filter):
"""
频率限制过滤器:
1. 在 60 秒内,同样的日志最多写入 60 次,超过则忽略
2. 如果日志速率超过 100 条/秒,发出告警
"""
LOG_LIMIT = 60 # 每分钟最多记录相同消息 10 次
def filter(self, record):
global log_count, last_log_time
message_key = record.getMessage() # 获取日志内容
# 计算当前时间
now = time.time()
elapsed = now - last_log_time[message_key]
# 限制相同日志的写入频率
if elapsed < 60: # 60 秒内
log_count[message_key] += 1
if log_count[message_key] > self.LOG_LIMIT:
print('reach limit.')
return False # 直接丢弃
else:
log_count[message_key] = 1 # 超过 60 秒,重新计数
last_log_time[message_key] = now
return True # 允许写入日志
def setup_logging(log_filename=None):
if log_filename is None:
caller_frame = inspect.stack()[1]
caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0]
current_date = datetime.now().strftime('%Y%m%d')
os.makedirs(log_dir, exist_ok=True)
log_filename = f'{log_dir}/{caller_filename}_{current_date}.log'
#log_filename = f'../log/{caller_filename}_{current_date}.log'
max_log_size = 100 * 1024 * 1024 # 10 MB
max_log_files = 10 # 最多保留 10 个日志文件
file_handler = RotatingFileHandler(log_filename, maxBytes=max_log_size, backupCount=max_log_files)
file_handler.setFormatter(logging.Formatter(
'%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s'
))
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s'
))
# 创建 logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [] # 避免重复添加 handler
logger.addHandler(file_handler)
logger.addHandler(console_handler)
# 添加频率限制
rate_limit_filter = RateLimitFilter()
file_handler.addFilter(rate_limit_filter)
console_handler.addFilter(rate_limit_filter)
# 运行示例
if __name__ == "__main__":
setup_logging()
for i in range(1000):
logging.info("测试日志,检测频率限制")
time.sleep(0.01) # 模拟快速写入日志

View File

@ -3,11 +3,14 @@ import json
import re import re
import logging import logging
from datetime import datetime from datetime import datetime
import config
def setup_logging(): def setup_logging():
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
db_path = "/root/sharedata/shared.db" #db_path = "/root/sharedata/shared.db"
# 连接 SQLite 数据库
db_path = f"{config.global_share_data_dir}/sqlite/shared.db" # 替换为你的数据库文件
def connect_db(db_name=db_path): def connect_db(db_name=db_path):
return sqlite3.connect(db_name) return sqlite3.connect(db_name)