modify scripts

This commit is contained in:
2025-12-29 09:44:32 +08:00
parent 9e3066317d
commit 2dd3d44419

188
src/tools/sync_filename.py Normal file
View File

@ -0,0 +1,188 @@
import sqlite3
import pymysql
from pymysql.cursors import DictCursor
import os
import argparse
from datetime import datetime
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def preload_folders(conn, prefix):
"""预加载所有文件夹路径到字典folder_id -> path"""
sqlstr = "SELECT id, path FROM folders where 1=1 "
if prefix and prefix.strip():
sqlstr += f" and path like '%{prefix}%' "
try:
cursor = conn.cursor()
cursor.execute(sqlstr)
return {row[0]: row[1] for row in cursor.fetchall()}
except sqlite3.Error as e:
logger.error(f"预加载文件夹信息失败: {str(e)}")
raise
def preload_studios(conn):
"""预加载所有工作室名称到字典studio_id -> name"""
try:
cursor = conn.cursor()
cursor.execute("SELECT id, name FROM studios")
studios = {row[0]: row[1] for row in cursor.fetchall()}
# 补充默认值(未找到的工作室)
studios[None] = "UnknownStudio"
return studios
except sqlite3.Error as e:
logger.error(f"预加载工作室信息失败: {str(e)}")
raise
# 从whisper 同步到stash
def sync_whisper_to_stash(whisper_db_path, stash_db_path, dir_prefix, studio_filter, run_mode="check"):
# 读取whisper数据库中的数据
try:
sqlite_conn = sqlite3.connect(whisper_db_path)
sqlite_conn.row_factory = sqlite3.Row
cursor_sqlite = sqlite_conn.cursor()
sql_str = f"""
SELECT episodes.Id as Id, episodes.Title as Title, episodes.AirDate as AirDate, episodes.ExternalId as whisper_code
from episodes
left join series on episodes.SeriesId = series.Id
where series.Title like '%{studio_filter}%'
"""
cursor_sqlite.execute(sql_str)
whisper_rows = cursor_sqlite.fetchall()
except sqlite3.Error as e:
logger.error(f"读取Whisper数据库失败: {str(e)}")
raise
logger.info(f"从Whisper数据库读取到 {len(whisper_rows)} 条记录")
idx_whisper = {}
for row in whisper_rows:
idx_whisper[row["whisper_code"]] = row
# 连接stash数据库预加载数据
try:
sqlite_conn_stash = sqlite3.connect(stash_db_path)
sqlite_conn_stash.row_factory = sqlite3.Row
cursor_sqlite_stash = sqlite_conn_stash.cursor()
# 1. 预加载文件夹和工作室到内存字典仅2次SQL查询
folders = preload_folders(sqlite_conn_stash, dir_prefix)
studios = preload_studios(sqlite_conn_stash)
logger.info(f"预加载完成 - 文件夹: {len(folders)} 个, 工作室: {len(studios)}")
# 2. 一次性查询所有关联数据1次SQL查询替代多次
cursor = sqlite_conn_stash.cursor()
query = """
SELECT
sf.scene_id, sf.file_id,
f.id AS file_id, f.basename, f.parent_folder_id,
s.title, s.date as release_date, s.studio_id as studio_id, s.code as scene_code
FROM scenes_files sf
LEFT JOIN files f ON sf.file_id = f.id
LEFT JOIN scenes s ON sf.scene_id = s.id
"""
cursor.execute(query)
mappings = cursor.fetchall()
logger.info(f"共找到 {len(mappings)} 条场景-文件映射记录")
except sqlite3.Error as e:
logger.error(f"预加载关联数据失败: {str(e)}")
raise
matched_cnt = 0
# 遍历mappings的记录逐个处理
for mapping in mappings:
scene_id = mapping['scene_id']
file_id = mapping['file_id']
code = mapping['scene_code']
studio_id = mapping['studio_id']
if code or studio_id:
# 已经有code或studio_id跳过
continue
folder_path = folders.get(mapping['parent_folder_id'])
if not folder_path:
logger.debug(f"文件夹ID不存在 (folder_id={mapping['parent_folder_id']}),跳过")
continue
basename = mapping['basename']
# 在whisper数据中查找匹配项
matched = False
for whisper_code, whisper_row in idx_whisper.items():
if whisper_code and basename and str(whisper_code) in basename:
# 找到匹配项更新stash数据库
matched = True
matched_cnt += 1
if run_mode == "check":
logger.info(f"[检查模式] 找到匹配的Whisper记录场景ID {scene_id}, filename {basename}code={whisper_row['whisper_code']}, title={whisper_row['Title']}, date={whisper_row['AirDate']}")
break
# 执行更新操作
new_code = whisper_row['whisper_code']
new_title = whisper_row['Title']
new_date = whisper_row['AirDate']
try:
update_cursor = sqlite_conn_stash.cursor()
update_cursor.execute("""
UPDATE scenes
SET code = ?, title = ?, date = ?
WHERE id = ?
""", (new_code, new_title, new_date, scene_id))
sqlite_conn_stash.commit()
logger.info(f"更新场景ID {scene_id}, filename {basename}code={new_code}, title={new_title}, date={new_date}")
except sqlite3.Error as e:
logger.error(f"更新场景ID {scene_id}, filename {basename} 失败: {str(e)}")
break # 跳出内层循环处理下一个mapping
if not matched:
logger.info(f"未找到匹配的Whisper记录跳过场景ID {scene_id}, filename {basename}")
logger.info(f"处理完成,共匹配并更新 {matched_cnt} 条记录。")
# 关闭数据库连接
sqlite_conn_stash.close()
sqlite_conn.close()
sqlite_config = {
'dev': {
'shared_db_path': "/root/sharedata/sqlite/shared.db",
'whisparr_db_path': "/root/sharedata/sqlite/whisparr2.db",
'stash_db_path': "/root/sharedata/sqlite/stash-go.sqlite"
},
'nas': {
'shared_db_path': "/root/sharedata/sqlite/shared.db",
'whisparr_db_path': "/root/sharedata/sqlite/whisper_db/whisparr2.db",
'stash_db_path': "/root/sharedata/sqlite/stash_db/stash-go.sqlite"
}
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='update filename from whisper to stash.\n\n',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("--dir_prefix", type=str, help=f"file directory prefix", default="")
parser.add_argument("--studio", type=str, help=f"studio name filter", default="")
parser.add_argument("--env", type=str, help=f"execute enviroment", default="dev")
parser.add_argument('--mode', choices=['check', 'run'], default='check',
help='运行模式: check(检查) 或 run(执行)')
args = parser.parse_args()
# get env config
current_env = args.env
if current_env not in sqlite_config:
current_env = 'dev' # 默认使用开发环境配置
if not args.dir_prefix:
print("未指定 dir_prefix程序退出")
exit(1)
if not args.studio:
print("未指定 studio程序退出")
exit(1)
sync_whisper_to_stash(
sqlite_config[current_env]['whisparr_db_path'],
sqlite_config[current_env]['stash_db_path'],
args.dir_prefix,
args.studio,
args.mode
)
exit(0)