diff --git a/src/tools/sync_db.py b/src/tools/sync_db.py index c190d2b..f310c87 100644 --- a/src/tools/sync_db.py +++ b/src/tools/sync_db.py @@ -184,17 +184,21 @@ def generate_union_table(mysql_conn): for idx, row in enumerate(stash_data): # 规范化name字段用于匹配 key_str = parse_union_key(row["date"], row["name"], row["code"]) - dict_stash_data[key_str] = idx + if key_str not in dict_stash_data: + dict_stash_data[key_str] = set() + dict_stash_data[key_str].add(idx) + stash_data[idx]['matched'] = False # 标记是否已匹配 dict_whisper_data = {} for idx, row in enumerate(whisper_data): # 规范化name字段用于匹配 key_str = parse_union_key(row["release_date"], row["studio_name"], row["whisper_code"]) - dict_whisper_data[key_str] = idx - + if key_str not in dict_whisper_data: + dict_whisper_data[key_str] = set() + dict_whisper_data[key_str].add(idx) + # 4. 内存中进行双边匹配 result = [] - dict_result_keys = set() count_matched = 0 count_whisper_only = 0 count_stash_only = 0 @@ -202,27 +206,48 @@ def generate_union_table(mysql_conn): # 先处理whisper到stash的匹配(原left join逻辑) for w in whisper_data: matched = False + s = None if w['release_date'] and w['studio_name']: # 对name进行去空格,去特殊字符处理 key_str = parse_union_key(w['release_date'], w['studio_name'], w['whisper_code']) if key_str in dict_stash_data: - s = stash_data[dict_stash_data[key_str]] - result.append({ - 'whisper_id': w['Id'], - 'release_year': w['release_year'] or 0, - 'release_date': w['release_date'], - 'whisper_code': w['whisper_code'], - 'title': w['title'], - 'studio_name': w['studio_name'], - 'date': s['date'], - 'code': s['code'], - 'stash_title': s['stash_title'], - 'name': s['name'] - }) - dict_result_keys.add(key_str) - count_matched += 1 - matched = True - if not matched: + set_idx = dict_stash_data[key_str] + if len(set_idx) == 1: + idx = set_idx.pop() + s = stash_data[idx] + stash_data[idx]['matched'] = True + matched = True + else: + # 多重匹配时,把code相同且未匹配过的优先匹配 + for idx in set_idx: + if (stash_data[idx]['code'] and w['whisper_code']) and stash_data[idx]['code'].lower() == w['whisper_code'].lower() and not stash_data[idx].get('matched', False): + s = stash_data[idx] + stash_data[idx]['matched'] = True + matched = True + break + if not matched: + # 否则匹配第一个未匹配过的 + for idx in set_idx: + if not stash_data[idx].get('matched', False): + s = stash_data[idx] + stash_data[idx]['matched'] = True + matched = True + break + if matched and s: + result.append({ + 'whisper_id': w['Id'], + 'release_year': w['release_year'] or 0, + 'release_date': w['release_date'], + 'whisper_code': w['whisper_code'], + 'title': w['title'], + 'studio_name': w['studio_name'], + 'date': s['date'], + 'code': s['code'], + 'stash_title': s['stash_title'], + 'name': s['name'] + }) + count_matched += 1 + else: result.append({ 'whisper_id': w['Id'], 'release_year': w['release_year'] or 0, @@ -239,27 +264,22 @@ def generate_union_table(mysql_conn): # 再处理stash到whisper的反向匹配(新增双边匹配逻辑) for s in stash_data: - matched = False - if s['date'] and s['name']: - # 对name进行去空格,去特殊字符处理 - key_str = parse_union_key(s['date'], s['name'], s['code']) - if key_str in dict_whisper_data: - matched = True - - if not matched: - result.append({ - 'whisper_id': 0, - 'release_year': 0, - 'release_date': None, - 'whisper_code': None, - 'title': None, - 'studio_name': None, - 'date': s['date'], - 'code': s['code'], - 'stash_title': s['stash_title'], - 'name': s['name'] - }) - count_stash_only += 1 + if s['matched']: + continue # 已匹配过的跳过 + + result.append({ + 'whisper_id': 0, + 'release_year': 0, + 'release_date': None, + 'whisper_code': None, + 'title': None, + 'studio_name': None, + 'date': s['date'], + 'code': s['code'], + 'stash_title': s['stash_title'], + 'name': s['name'] + }) + count_stash_only += 1 print(f"匹配完成:匹配成功 {count_matched} 条,Whisper 独有 {count_whisper_only} 条,Stash 独有 {count_stash_only} 条")