modify scripts
This commit is contained in:
@ -242,7 +242,7 @@ def fetch_performers_detail_once(perfomers_list):
|
|||||||
for performer in perfomers_list:
|
for performer in perfomers_list:
|
||||||
url = performer['href']
|
url = performer['href']
|
||||||
person = performer['name']
|
person = performer['name']
|
||||||
logging.info(f"Fetching data for performer ({person}), url {url} ...")
|
logging.debug(f"Fetching data for performer ({person}), url {url} ...")
|
||||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id"))
|
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id"))
|
||||||
if soup:
|
if soup:
|
||||||
data = scraper.parse_page_performer(soup)
|
data = scraper.parse_page_performer(soup)
|
||||||
|
|||||||
@ -1,27 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
import inspect
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# 映射到宿主机的目录
|
|
||||||
home_dir = os.path.expanduser("~")
|
|
||||||
global_host_data_dir = f'{home_dir}/hostdir/scripts_data/thelordofporn'
|
|
||||||
|
|
||||||
# 设置日志配置
|
|
||||||
def setup_logging(log_filename=None):
|
|
||||||
# 如果未传入 log_filename,则使用当前脚本名称作为日志文件名
|
|
||||||
if log_filename is None:
|
|
||||||
# 获取调用 setup_logging 的脚本文件名
|
|
||||||
caller_frame = inspect.stack()[1]
|
|
||||||
caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0]
|
|
||||||
|
|
||||||
# 获取当前日期,格式为 yyyymmdd
|
|
||||||
current_date = datetime.now().strftime('%Y%m%d')
|
|
||||||
# 拼接 log 文件名,将日期加在扩展名前
|
|
||||||
log_filename = f'./log/{caller_filename}_{current_date}.log'
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s',
|
|
||||||
handlers=[
|
|
||||||
logging.FileHandler(log_filename),
|
|
||||||
logging.StreamHandler()
|
|
||||||
])
|
|
||||||
91
thelordofporn/src/config.py
Normal file
91
thelordofporn/src/config.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import inspect
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
# 映射到宿主机的目录
|
||||||
|
home_dir = os.path.expanduser("~")
|
||||||
|
global_host_data_dir = f'{home_dir}/hostdir/scripts_data/thelordofporn'
|
||||||
|
global_share_data_dir = f'{home_dir}/sharedata'
|
||||||
|
|
||||||
|
# 统计日志频率
|
||||||
|
log_count = defaultdict(int) # 记录日志的次数
|
||||||
|
last_log_time = defaultdict(float) # 记录上次写入的时间戳
|
||||||
|
|
||||||
|
log_dir = '../log'
|
||||||
|
class RateLimitFilter(logging.Filter):
|
||||||
|
"""
|
||||||
|
频率限制过滤器:
|
||||||
|
1. 在 60 秒内,同样的日志最多写入 60 次,超过则忽略
|
||||||
|
2. 如果日志速率超过 100 条/秒,发出告警
|
||||||
|
"""
|
||||||
|
LOG_LIMIT = 60 # 每分钟最多记录相同消息 10 次
|
||||||
|
|
||||||
|
def filter(self, record):
|
||||||
|
global log_count, last_log_time
|
||||||
|
message_key = record.getMessage() # 获取日志内容
|
||||||
|
|
||||||
|
# 计算当前时间
|
||||||
|
now = time.time()
|
||||||
|
elapsed = now - last_log_time[message_key]
|
||||||
|
|
||||||
|
# 限制相同日志的写入频率
|
||||||
|
if elapsed < 60: # 60 秒内
|
||||||
|
log_count[message_key] += 1
|
||||||
|
if log_count[message_key] > self.LOG_LIMIT:
|
||||||
|
print('reach limit.')
|
||||||
|
return False # 直接丢弃
|
||||||
|
else:
|
||||||
|
log_count[message_key] = 1 # 超过 60 秒,重新计数
|
||||||
|
|
||||||
|
last_log_time[message_key] = now
|
||||||
|
|
||||||
|
return True # 允许写入日志
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logging(log_filename=None):
|
||||||
|
if log_filename is None:
|
||||||
|
caller_frame = inspect.stack()[1]
|
||||||
|
caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0]
|
||||||
|
current_date = datetime.now().strftime('%Y%m%d')
|
||||||
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
|
log_filename = f'{log_dir}/{caller_filename}_{current_date}.log'
|
||||||
|
#log_filename = f'../log/{caller_filename}_{current_date}.log'
|
||||||
|
|
||||||
|
max_log_size = 100 * 1024 * 1024 # 10 MB
|
||||||
|
max_log_files = 10 # 最多保留 10 个日志文件
|
||||||
|
|
||||||
|
file_handler = RotatingFileHandler(log_filename, maxBytes=max_log_size, backupCount=max_log_files)
|
||||||
|
file_handler.setFormatter(logging.Formatter(
|
||||||
|
'%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s'
|
||||||
|
))
|
||||||
|
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(logging.Formatter(
|
||||||
|
'%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s'
|
||||||
|
))
|
||||||
|
|
||||||
|
# 创建 logger
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
logger.handlers = [] # 避免重复添加 handler
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
logger.addHandler(console_handler)
|
||||||
|
|
||||||
|
# 添加频率限制
|
||||||
|
rate_limit_filter = RateLimitFilter()
|
||||||
|
file_handler.addFilter(rate_limit_filter)
|
||||||
|
console_handler.addFilter(rate_limit_filter)
|
||||||
|
|
||||||
|
|
||||||
|
# 运行示例
|
||||||
|
if __name__ == "__main__":
|
||||||
|
setup_logging()
|
||||||
|
|
||||||
|
for i in range(1000):
|
||||||
|
logging.info("测试日志,检测频率限制")
|
||||||
|
time.sleep(0.01) # 模拟快速写入日志
|
||||||
@ -3,11 +3,14 @@ import json
|
|||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import config
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
db_path = "/root/sharedata/shared.db"
|
#db_path = "/root/sharedata/shared.db"
|
||||||
|
# 连接 SQLite 数据库
|
||||||
|
db_path = f"{config.global_share_data_dir}/sqlite/shared.db" # 替换为你的数据库文件
|
||||||
|
|
||||||
def connect_db(db_name=db_path):
|
def connect_db(db_name=db_path):
|
||||||
return sqlite3.connect(db_name)
|
return sqlite3.connect(db_name)
|
||||||
Reference in New Issue
Block a user