modify scripts

This commit is contained in:
2025-12-25 15:38:52 +08:00
parent 483a43c442
commit 28daf55f3f

View File

@ -184,17 +184,21 @@ def generate_union_table(mysql_conn):
for idx, row in enumerate(stash_data):
# 规范化name字段用于匹配
key_str = parse_union_key(row["date"], row["name"], row["code"])
dict_stash_data[key_str] = idx
if key_str not in dict_stash_data:
dict_stash_data[key_str] = set()
dict_stash_data[key_str].add(idx)
stash_data[idx]['matched'] = False # 标记是否已匹配
dict_whisper_data = {}
for idx, row in enumerate(whisper_data):
# 规范化name字段用于匹配
key_str = parse_union_key(row["release_date"], row["studio_name"], row["whisper_code"])
dict_whisper_data[key_str] = idx
if key_str not in dict_whisper_data:
dict_whisper_data[key_str] = set()
dict_whisper_data[key_str].add(idx)
# 4. 内存中进行双边匹配
result = []
dict_result_keys = set()
count_matched = 0
count_whisper_only = 0
count_stash_only = 0
@ -202,11 +206,34 @@ def generate_union_table(mysql_conn):
# 先处理whisper到stash的匹配原left join逻辑
for w in whisper_data:
matched = False
s = None
if w['release_date'] and w['studio_name']:
# 对name进行去空格去特殊字符处理
key_str = parse_union_key(w['release_date'], w['studio_name'], w['whisper_code'])
if key_str in dict_stash_data:
s = stash_data[dict_stash_data[key_str]]
set_idx = dict_stash_data[key_str]
if len(set_idx) == 1:
idx = set_idx.pop()
s = stash_data[idx]
stash_data[idx]['matched'] = True
matched = True
else:
# 多重匹配时把code相同且未匹配过的优先匹配
for idx in set_idx:
if (stash_data[idx]['code'] and w['whisper_code']) and stash_data[idx]['code'].lower() == w['whisper_code'].lower() and not stash_data[idx].get('matched', False):
s = stash_data[idx]
stash_data[idx]['matched'] = True
matched = True
break
if not matched:
# 否则匹配第一个未匹配过的
for idx in set_idx:
if not stash_data[idx].get('matched', False):
s = stash_data[idx]
stash_data[idx]['matched'] = True
matched = True
break
if matched and s:
result.append({
'whisper_id': w['Id'],
'release_year': w['release_year'] or 0,
@ -219,10 +246,8 @@ def generate_union_table(mysql_conn):
'stash_title': s['stash_title'],
'name': s['name']
})
dict_result_keys.add(key_str)
count_matched += 1
matched = True
if not matched:
else:
result.append({
'whisper_id': w['Id'],
'release_year': w['release_year'] or 0,
@ -239,14 +264,9 @@ def generate_union_table(mysql_conn):
# 再处理stash到whisper的反向匹配新增双边匹配逻辑
for s in stash_data:
matched = False
if s['date'] and s['name']:
# 对name进行去空格去特殊字符处理
key_str = parse_union_key(s['date'], s['name'], s['code'])
if key_str in dict_whisper_data:
matched = True
if s['matched']:
continue # 已匹配过的跳过
if not matched:
result.append({
'whisper_id': 0,
'release_year': 0,