diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1515a1b --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# 其他已有的忽略规则 +*.pyc +__pycache__/ + +# 忽略环境配置文件 +.env + +# 忽略所有 log 目录 和 data 目录 +**/log/ +**/data/ +**/result/ diff --git a/docker/paperless/plugins/consume.sh b/docker/paperless/plugins/consume.sh index ab08d6f..8737035 100755 --- a/docker/paperless/plugins/consume.sh +++ b/docker/paperless/plugins/consume.sh @@ -1,7 +1,7 @@ #!/bin/bash SRC="/volume1/docker/sharedata/stock_data/pdfs" DST="/volume1/docker/sharedata/stock_data/em_reports_consume" -LOG="./paperless.log" +LOG="./log/paperless.log" TARGET_UID=1000 TARGET_GID=1000 @@ -16,6 +16,13 @@ if [ ! -d "$DST" ]; then exit 1 fi +# 关键添加:检查并创建log目录(-p 确保父目录存在,无报错) +LOG_DIR=$(dirname "$LOG") # 提取日志文件所在目录(即 ./log) +if [ ! -d "$LOG_DIR" ]; then + mkdir -p "$LOG_DIR" + echo "$(date '+%F %T') [INFO] log目录不存在,已创建: $LOG_DIR" | tee -a "$LOG" +fi + COUNT=0 for f in "$SRC"/*.pdf; do [ -f "$f" ] || continue diff --git a/docker/stash/scripts/batch_format_filename.py b/docker/stash/scripts/batch_format_filename.py new file mode 100644 index 0000000..c6b1ebc --- /dev/null +++ b/docker/stash/scripts/batch_format_filename.py @@ -0,0 +1,263 @@ +import sqlite3 +import os +import logging +import json +from datetime import datetime +import argparse +import re + +res_dir = './result' +os.makedirs(res_dir, exist_ok=True) + +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler(f'{res_dir}/rename_files.log'), + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + +def preload_folders(conn, prefix): + """预加载所有文件夹路径到字典(folder_id -> path)""" + sqlstr = "SELECT id, path FROM folders where 1=1 " + if prefix and prefix.strip(): + sqlstr += f" and path like '%{prefix}%' " + try: + cursor = conn.cursor() + cursor.execute(sqlstr) + return {row[0]: row[1] for row in cursor.fetchall()} + except sqlite3.Error as e: + logger.error(f"预加载文件夹信息失败: {str(e)}") + raise + +def preload_studios(conn): + """预加载所有工作室名称到字典(studio_id -> name)""" + try: + cursor = conn.cursor() + cursor.execute("SELECT id, name FROM studios") + studios = {row[0]: row[1] for row in cursor.fetchall()} + # 补充默认值(未找到的工作室) + studios[None] = "UnknownStudio" + return studios + except sqlite3.Error as e: + logger.error(f"预加载工作室信息失败: {str(e)}") + raise + +def get_performers(conn, scene_id): + """获取场景对应的演员列表(按字母序排序,逗号分隔)""" + try: + cursor = conn.cursor() + query = """ + SELECT p.name + FROM performers p + JOIN performers_scenes ps ON p.id = ps.performer_id + WHERE ps.scene_id = ? + ORDER BY p.name + """ + cursor.execute(query, (scene_id,)) + results = cursor.fetchall() + return ','.join([row[0] for row in results]) or "UnknownPerformers" + except sqlite3.Error as e: + logger.error(f"获取演员信息失败 (scene_id={scene_id}): {str(e)}") + raise + +def parse_date(date_str): + """解析日期为yyyy.mm.dd格式""" + if not date_str: + return "0000.00.00" + + date_formats = [ + "%Y-%m-%d", "%Y/%m/%d", "%d-%m-%Y", "%d/%m/%Y", + "%Y%m%d", "%m-%d-%Y", "%m/%d/%Y" + ] + + for fmt in date_formats: + try: + return datetime.strptime(date_str, fmt).strftime("%Y.%m.%d") + except ValueError: + continue + + logger.warning(f"无法解析日期格式: {date_str},使用默认值") + return "0000.00.00" + +def get_file_extension(basename): + """获取文件扩展名""" + if '.' in basename: + return basename.split('.')[-1].lower() + return '' + +def sanitize_filename(name): + """清理文件名中的非法字符""" + invalid_chars = '/\\:*?"<>|' + for char in invalid_chars: + name = name.replace(char, '-') + return name + +def process_scene_files(conn, mode, prefix): + """处理所有场景文件映射关系(优化版:合并查询+预加载缓存)""" + results = [] + try: + # 1. 预加载文件夹和工作室到内存字典(仅2次SQL查询) + folders = preload_folders(conn, prefix) + studios = preload_studios(conn) + logger.info(f"预加载完成 - 文件夹: {len(folders)} 个, 工作室: {len(studios)} 个") + + # 2. 一次性查询所有关联数据(1次SQL查询替代多次) + cursor = conn.cursor() + query = """ + SELECT + sf.scene_id, sf.file_id, + f.id AS file_id, f.basename, f.parent_folder_id, + s.title, s.date as release_date, s.studio_id, s.code + FROM scenes_files sf + LEFT JOIN files f ON sf.file_id = f.id + LEFT JOIN scenes s ON sf.scene_id = s.id + """ + cursor.execute(query) + mappings = cursor.fetchall() + logger.info(f"共找到 {len(mappings)} 条场景-文件映射记录") + + for idx, row in enumerate(mappings, 1): + try: + # 解析合并查询的结果 + scene_id = row[0] + file_id = row[1] + file_info = { + 'id': row[2], + 'basename': row[3], + 'parent_folder_id': row[4] + } + scene_info = { + 'title': row[5], + 'release_date': row[6], + 'studio_id': row[7], + 'code': row[8] + } + + # 校验必要数据 + if not file_id or not file_info['id'] or not file_info['basename'] or not file_info['parent_folder_id']: + logger.debug(f"文件ID信息不完整 (scene_id={scene_id}, file_id={file_id}),跳过") + continue + if not scene_id or not scene_info['title'] or not scene_info['release_date'] or not scene_info['studio_id']: + logger.debug(f"场景信息不完整 (scene_id={scene_id}, file_id={file_id}),跳过") + continue + + # 3. 从内存缓存获取文件夹路径和工作室名称(无SQL查询) + folder_path = folders.get(file_info['parent_folder_id']) + if not folder_path: + logger.debug(f"文件夹ID不存在 (folder_id={file_info['parent_folder_id']}),跳过") + continue + studio_name = studios.get(scene_info['studio_id']) + if not studio_name: + logger.debug(f"工作室ID不存在 (studio_id={scene_info['studio_id']}),跳过") + continue + + # 4. 获取演员信息(仍需单独查询,因多对多关联需排序) + performers = get_performers(conn, scene_id) + + # 5. 构建新文件名 + original_basename = file_info['basename'] or "unknown_file" + ext = get_file_extension(original_basename) + release_date = parse_date(scene_info['release_date']) + title = scene_info['title'] or "Untitled" + + # 清理特殊字符 + sanitized_studio = sanitize_filename(studio_name) + sanitized_performers = sanitize_filename(performers)[0:100] # 限制长度避免过长 + sanitized_title = sanitize_filename(title)[0:100] # 限制长度避免过长 + if scene_info.get('code'): + sanitized_title = f"{sanitized_title} ({scene_info['code']})" + # 去掉sanitized_studio的空格 + sanitized_studio = re.sub(r'\s+', '', sanitized_studio) + + # 拼接新文件名 + if ext: + new_basename = f"{sanitized_studio}.{release_date} {sanitized_performers} - {sanitized_title}.{ext}" + else: + new_basename = f"{sanitized_studio}.{release_date} {sanitized_performers} - {sanitized_title}" + + if len(new_basename) > 254: + logger.warning(f"生成的文件名过长,跳过 (file_id={file_id}): {new_basename}") + continue + + # 构建完整路径 + original_path = os.path.join(folder_path, original_basename) + new_path = os.path.join(folder_path, new_basename) + + # 记录结果 + result = { + 'file_id': file_id, + 'scene_id': scene_id, + 'original_name': original_path, + 'dest_name': new_path + } + results.append(result) + logger.info(f"处理第 {idx}/{len(mappings)} 条: {original_path} -> {new_path}") + + # 运行模式:执行重命名和数据库更新 + if mode == 'run': + if not os.path.exists(original_path): + logger.warning(f"文件不存在,跳过: {original_path}") + continue + if os.path.exists(new_path): + logger.warning(f"目标文件已存在,跳过: {new_path}") + continue + if original_path != new_path: + os.rename(original_path, new_path) + #cursor.execute( + # "UPDATE files SET basename = ? WHERE id = ?", + # (new_basename, file_info['id']) + #) + #conn.commit() + logger.info(f"已更新文件 (file_id={file_info['id']})") + + except Exception as e: + logger.error(f"处理记录失败 (scene_id={scene_id}, file_id={file_id}): {str(e)}", exc_info=True) + if mode == 'run': + conn.rollback() + continue + + # 保存结果 + with open(f'{res_dir}/rename_results.json', 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + logger.info(f"处理完成,结果已保存到 rename_results.json") + return results + + except sqlite3.Error as e: + logger.error(f"数据库操作失败: {str(e)}", exc_info=True) + if mode == 'run': + conn.rollback() + raise + finally: + if mode == 'run': + conn.commit() + +def main(): + parser = argparse.ArgumentParser(description='电影文件重命名工具(优化版)') + parser.add_argument('--mode', choices=['check', 'run'], default='check', + help='运行模式: check(检查) 或 run(执行)') + parser.add_argument('--db', default='movies.db', help='SQLite数据库文件路径') + parser.add_argument('--prefix', default='', help='目录前缀,用来过滤文件路径') + args = parser.parse_args() + + if not os.path.exists(args.db): + logger.error(f"数据库文件不存在: {args.db}") + return + + conn = None + try: + conn = sqlite3.connect(args.db) + logger.info(f"成功连接到数据库: {args.db}") + process_scene_files(conn, args.mode, args.prefix) + except sqlite3.Error as e: + logger.error(f"数据库连接失败: {str(e)}", exc_info=True) + finally: + if conn: + conn.close() + logger.info("数据库连接已关闭") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/docker/stash/scripts/format_filename.py b/docker/stash/scripts/format_filename.py new file mode 100644 index 0000000..8ed7c67 --- /dev/null +++ b/docker/stash/scripts/format_filename.py @@ -0,0 +1,288 @@ +import sqlite3 +import os +import logging +import json +from datetime import datetime +import argparse +import re + +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('./result/rename_files.log'), + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + +def get_performers(conn, scene_id): + """获取场景对应的演员列表(按字母序排序,逗号分隔)""" + try: + cursor = conn.cursor() + # 优化查询:使用JOIN一次性获取所需数据 + query = """ + SELECT p.name + FROM performers p + JOIN performers_scenes ps ON p.id = ps.performer_id + WHERE ps.scene_id = ? + ORDER BY p.name + """ + cursor.execute(query, (scene_id,)) + results = cursor.fetchall() + return ','.join([row[0] for row in results]) + except sqlite3.Error as e: + logger.error(f"获取演员信息失败 (scene_id={scene_id}): {str(e)}") + raise + +def get_file_info(conn, file_id): + """获取文件信息(ID、原始文件名、父目录ID)""" + try: + cursor = conn.cursor() + cursor.execute(""" + SELECT id, basename, parent_folder_id + FROM files + WHERE id = ? + """, (file_id,)) + result = cursor.fetchone() + if not result: + raise ValueError(f"未找到文件信息 (file_id={file_id})") + return { + 'id': result[0], + 'basename': result[1], + 'parent_folder_id': result[2] + } + except sqlite3.Error as e: + logger.error(f"获取文件信息失败 (file_id={file_id}): {str(e)}") + raise + +def get_folder_path(conn, folder_id): + """获取文件夹路径""" + try: + cursor = conn.cursor() + cursor.execute("SELECT path FROM folders WHERE id = ?", (folder_id,)) + result = cursor.fetchone() + if not result: + raise ValueError(f"未找到文件夹路径 (folder_id={folder_id})") + return result[0] + except sqlite3.Error as e: + logger.error(f"获取文件夹路径失败 (folder_id={folder_id}): {str(e)}") + raise + +def get_scene_info(conn, scene_id): + """获取场景信息(标题、日期、工作室ID)""" + try: + cursor = conn.cursor() + cursor.execute(""" + SELECT title, date as release_date, studio_id + FROM scenes + WHERE id = ? + """, (scene_id,)) + result = cursor.fetchone() + if not result: + raise ValueError(f"未找到场景信息 (scene_id={scene_id})") + return { + 'title': result[0], + 'release_date': result[1], + 'studio_id': result[2] + } + except sqlite3.Error as e: + logger.error(f"获取场景信息失败 (scene_id={scene_id}): {str(e)}") + raise + +def get_studio_name(conn, studio_id): + """获取工作室名称""" + try: + cursor = conn.cursor() + cursor.execute("SELECT name FROM studios WHERE id = ?", (studio_id,)) + result = cursor.fetchone() + if not result: + logger.warning(f"未找到工作室信息 (studio_id={studio_id}),使用默认名称") + return "UnknownStudio" + return result[0] + except sqlite3.Error as e: + logger.error(f"获取工作室信息失败 (studio_id={studio_id}): {str(e)}") + raise + +def parse_date(date_str): + """解析日期为yyyy.mm.dd格式""" + if not date_str: + return "0000.00.00" + + # 尝试多种常见日期格式 + date_formats = [ + "%Y-%m-%d", "%Y/%m/%d", "%d-%m-%Y", "%d/%m/%Y", + "%Y%m%d", "%m-%d-%Y", "%m/%d/%Y" + ] + + for fmt in date_formats: + try: + date_obj = datetime.strptime(date_str, fmt) + return date_obj.strftime("%Y.%m.%d") + except ValueError: + continue + + logger.warning(f"无法解析日期格式: {date_str},使用默认值") + return "0000.00.00" + +def get_file_extension(basename): + """获取文件扩展名""" + if '.' in basename: + return basename.split('.')[-1].lower() + return '' + +def sanitize_filename(name): + """清理文件名中的非法字符""" + invalid_chars = '/\\:*?"<>|' + for char in invalid_chars: + name = name.replace(char, '-') + return name + +def process_scene_files(conn, mode, prefix): + """处理所有场景文件映射关系""" + results = [] + try: + cursor = conn.cursor() + # 获取所有场景-文件映射关系 + cursor.execute("SELECT scene_id, file_id FROM scenes_files") + mappings = cursor.fetchall() + logger.debug(f"共找到 {len(mappings)} 条场景-文件映射记录") + + for idx, (scene_id, file_id) in enumerate(mappings, 1): + logger.debug(f"处理第 {idx}/{len(mappings)} 条记录 (scene_id={scene_id}, file_id={file_id})") + + try: + # 1. 获取文件信息 + file_info = get_file_info(conn, file_id) + original_basename = file_info['basename'] + parent_folder_id = file_info['parent_folder_id'] + + # 2.获取文件夹路径 + folder_path = get_folder_path(conn, parent_folder_id) + + # 3. 获取演员信息 + performers = get_performers(conn, scene_id) + if not performers: + performers = "UnknownPerformers" + logger.warning(f"场景 {scene_id} 未找到演员信息,跳过") + continue + + # 4. 获取场景和工作室信息 + scene_info = get_scene_info(conn, scene_id) + if not scene_info['title'] or not scene_info['release_date'] or not scene_info['studio_id']: + logger.warning(f"场景 {scene_id} 信息不完整,跳过") + continue + title = scene_info['title'] or "Untitled" + release_date = parse_date(scene_info['release_date']) + studio_name = get_studio_name(conn, scene_info['studio_id']) + + # 5. 构建新文件名 + ext = get_file_extension(original_basename) + sanitized_studio = sanitize_filename(studio_name) + sanitized_performers = sanitize_filename(performers)[0:100] # 限制长度避免过长 + sanitized_title = sanitize_filename(title)[0:100] # 限制长度避免过长 + + if ext: + new_basename = f"{sanitized_studio} - {release_date} - {sanitized_performers} - {sanitized_title}.{ext}" + else: + new_basename = f"{sanitized_studio} - {release_date} - {sanitized_performers} - {sanitized_title}" + + if len(new_basename) > 254: + logger.warning(f"生成的文件名过长,跳过 (file_id={file_id}): {new_basename}") + continue + + # 构建完整路径 + original_path = os.path.join(folder_path, original_basename) + new_path = os.path.join(folder_path, new_basename) + + # 记录结果 + result = { + 'file_id': file_id, + 'scene_id': scene_id, + 'original_name': original_path, + 'dest_name': new_path + } + results.append(result) + + # 输出检查信息 + logger.info(f"准备重命名: {original_path} -> {new_path}") + + # 在运行模式下执行操作 + if mode == 'run': + # 检查文件是否存在 + if not os.path.exists(original_path): + logger.warning(f"文件不存在,跳过: {original_path}") + continue + + # 执行文件重命名 + if original_path != new_path: + os.rename(original_path, new_path) + logger.info(f"已重命名: {original_path} -> {new_path}") + + # 更新数据库记录 + cursor.execute( + "UPDATE files SET basename = ? WHERE id = ?", + (new_basename, file_id) + ) + conn.commit() + logger.info(f"已更新数据库记录 (file_id={file_id})") + + except Exception as e: + logger.error(f"处理记录失败 (scene_id={scene_id}, file_id={file_id}): {str(e)}", exc_info=True) + # 回滚当前事务(如果是运行模式) + if mode == 'run': + conn.rollback() + continue + + # 保存结果到文件 + with open('./result/rename_results.json', 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + logger.info(f"处理完成,结果已保存到 rename_results.json") + + return results + + except sqlite3.Error as e: + logger.error(f"数据库操作失败: {str(e)}", exc_info=True) + if mode == 'run': + conn.rollback() + raise + finally: + if mode == 'run': + conn.commit() + +def main(): + # 解析命令行参数 + parser = argparse.ArgumentParser(description='电影文件重命名工具') + parser.add_argument('--mode', choices=['check', 'run'], default='check', + help='运行模式: check(检查) 或 run(执行)') + parser.add_argument('--db', default='movies.db', help='SQLite数据库文件路径') + parser.add_argument('--prefix', default='', help='目录的前缀,用来匹配') + args = parser.parse_args() + + # 验证数据库文件是否存在 + if not os.path.exists(args.db): + logger.error(f"数据库文件不存在: {args.db}") + return + + os.makedirs('./result', exist_ok=True) + + # 连接数据库 + conn = None + try: + conn = sqlite3.connect(args.db) + conn.row_factory = sqlite3.Row # 启用行工厂,方便按列名访问 + logger.info(f"成功连接到数据库: {args.db}") + + # 执行处理 + process_scene_files(conn, args.mode, args.prefix) + + except sqlite3.Error as e: + logger.error(f"数据库连接失败: {str(e)}", exc_info=True) + finally: + if conn: + conn.close() + logger.info("数据库连接已关闭") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/gitignore b/gitignore new file mode 100644 index 0000000..1515a1b --- /dev/null +++ b/gitignore @@ -0,0 +1,11 @@ +# 其他已有的忽略规则 +*.pyc +__pycache__/ + +# 忽略环境配置文件 +.env + +# 忽略所有 log 目录 和 data 目录 +**/log/ +**/data/ +**/result/