This repository has been archived on 2026-01-07. You can view files and clone it, but cannot push or open issues or pull requests.
Files
resources/src/tools/sync_filename.py
2025-12-29 16:56:16 +08:00

200 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sqlite3
import pymysql
from pymysql.cursors import DictCursor
import os
import argparse
from datetime import datetime
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def preload_folders(conn, prefix):
"""预加载所有文件夹路径到字典folder_id -> path"""
sqlstr = "SELECT id, path FROM folders where 1=1 "
if prefix and prefix.strip():
sqlstr += f" and path like '%{prefix}%' "
try:
cursor = conn.cursor()
cursor.execute(sqlstr)
return {row[0]: row[1] for row in cursor.fetchall()}
except sqlite3.Error as e:
logger.error(f"预加载文件夹信息失败: {str(e)}")
raise
def preload_studios(conn):
"""预加载所有工作室名称到字典studio_id -> name"""
try:
cursor = conn.cursor()
cursor.execute("SELECT id, name FROM studios")
studios = {row[0]: row[1] for row in cursor.fetchall()}
# 补充默认值(未找到的工作室)
studios[None] = "UnknownStudio"
return studios
except sqlite3.Error as e:
logger.error(f"预加载工作室信息失败: {str(e)}")
raise
# 从whisper 同步到stash
def sync_whisper_to_stash(whisper_db_path, stash_db_path, dir_prefix, studio_filter, run_mode="check"):
# 读取whisper数据库中的数据
try:
sqlite_conn = sqlite3.connect(whisper_db_path)
sqlite_conn.row_factory = sqlite3.Row
cursor_sqlite = sqlite_conn.cursor()
sql_str = f"""
SELECT episodes.Id as Id, episodes.Title as Title, episodes.AirDate as AirDate, episodes.ExternalId as whisper_code
from episodes
left join series on episodes.SeriesId = series.Id
where series.Title like '%{studio_filter}%'
"""
cursor_sqlite.execute(sql_str)
whisper_rows = cursor_sqlite.fetchall()
except sqlite3.Error as e:
logger.error(f"读取Whisper数据库失败: {str(e)}")
raise
logger.info(f"从Whisper数据库读取到 {len(whisper_rows)} 条记录")
idx_whisper = {}
for row in whisper_rows:
idx_whisper[row["whisper_code"]] = row
# 连接stash数据库预加载数据
try:
sqlite_conn_stash = sqlite3.connect(stash_db_path)
sqlite_conn_stash.row_factory = sqlite3.Row
cursor_sqlite_stash = sqlite_conn_stash.cursor()
# 1. 预加载文件夹和工作室到内存字典仅2次SQL查询
folders = preload_folders(sqlite_conn_stash, dir_prefix)
studios = preload_studios(sqlite_conn_stash)
logger.info(f"预加载完成 - 文件夹: {len(folders)} 个, 工作室: {len(studios)}")
# 2. 一次性查询所有关联数据1次SQL查询替代多次
cursor = sqlite_conn_stash.cursor()
query = """
SELECT
sf.scene_id, sf.file_id,
f.id AS file_id, f.basename, f.parent_folder_id,
s.title, s.date as release_date, s.studio_id as studio_id, s.code as scene_code
FROM scenes_files sf
LEFT JOIN files f ON sf.file_id = f.id
LEFT JOIN scenes s ON sf.scene_id = s.id
"""
cursor.execute(query)
mappings = cursor.fetchall()
logger.info(f"共找到 {len(mappings)} 条场景-文件映射记录")
except sqlite3.Error as e:
logger.error(f"预加载关联数据失败: {str(e)}")
raise
matched_cnt = 0
# 遍历mappings的记录逐个处理
for mapping in mappings:
scene_id = mapping['scene_id']
file_id = mapping['file_id']
code = mapping['scene_code']
studio_id = mapping['studio_id']
if code or studio_id:
# 已经有code或studio_id跳过
continue
folder_path = folders.get(mapping['parent_folder_id'])
if not folder_path:
logger.debug(f"文件夹ID不存在 (folder_id={mapping['parent_folder_id']}),跳过")
continue
basename = mapping['basename']
# 在whisper数据中查找匹配项
matched = False
for whisper_code, whisper_row in idx_whisper.items():
if whisper_code and basename and str(whisper_code).lower() in basename.lower():
# 尝试对 basename 进行截取,从匹配到 whisper_code 的位置开始截取,一直遇到非字母和数字的字符为止
start_index = basename.lower().find(str(whisper_code).lower())
end_index = start_index + len(whisper_code)
# 向后扩展,直到遇到非字母数字字符
while end_index < len(basename) and basename[end_index].isalnum():
end_index += 1
extracted_code = basename[start_index:end_index]
if extracted_code.lower() != str(whisper_code).lower():
# 截取结果与whisper_code不匹配跳过
continue
# 找到匹配项更新stash数据库
matched = True
matched_cnt += 1
if run_mode == "check":
logger.info(f"[检查模式] 找到匹配的Whisper记录场景ID {scene_id}, filename {basename}code={whisper_row['whisper_code']}, title={whisper_row['Title']}, date={whisper_row['AirDate']}")
break
# 执行更新操作
new_code = whisper_row['whisper_code']
new_title = whisper_row['Title']
new_date = whisper_row['AirDate']
try:
update_cursor = sqlite_conn_stash.cursor()
update_cursor.execute("""
UPDATE scenes
SET code = ?, title = ?, date = ?
WHERE id = ?
""", (new_code, new_title, new_date, scene_id))
sqlite_conn_stash.commit()
logger.info(f"更新场景ID {scene_id}, filename {basename}code={new_code}, title={new_title}, date={new_date}")
except sqlite3.Error as e:
logger.error(f"更新场景ID {scene_id}, filename {basename} 失败: {str(e)}")
break # 跳出内层循环处理下一个mapping
if not matched:
logger.info(f"未找到匹配的Whisper记录跳过场景ID {scene_id}, filename {basename}")
logger.info(f"处理完成,共匹配并更新 {matched_cnt} 条记录。")
# 关闭数据库连接
sqlite_conn_stash.close()
sqlite_conn.close()
sqlite_config = {
'dev': {
'shared_db_path': "/root/sharedata/sqlite/shared.db",
'whisparr_db_path': "/root/sharedata/sqlite/whisparr2.db",
'stash_db_path': "/root/sharedata/sqlite/stash-go.sqlite"
},
'nas': {
'shared_db_path': "/root/sharedata/sqlite/shared.db",
'whisparr_db_path': "/root/sharedata/sqlite/whisper_db/whisparr2.db",
'stash_db_path': "/root/sharedata/sqlite/stash_db/stash-go.sqlite"
}
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='update filename from whisper to stash.\n\n',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("--dir_prefix", type=str, help=f"file directory prefix", default="")
parser.add_argument("--studio", type=str, help=f"studio name filter", default="")
parser.add_argument("--env", type=str, help=f"execute enviroment", default="dev")
parser.add_argument('--mode', choices=['check', 'run'], default='check',
help='运行模式: check(检查) 或 run(执行)')
args = parser.parse_args()
# get env config
current_env = args.env
if current_env not in sqlite_config:
current_env = 'dev' # 默认使用开发环境配置
if not args.dir_prefix:
print("未指定 dir_prefix程序退出")
exit(1)
if not args.studio:
print("未指定 studio程序退出")
exit(1)
sync_whisper_to_stash(
sqlite_config[current_env]['whisparr_db_path'],
sqlite_config[current_env]['stash_db_path'],
args.dir_prefix,
args.studio,
args.mode
)
exit(0)