modify scripts
This commit is contained in:
@ -184,17 +184,21 @@ def generate_union_table(mysql_conn):
|
||||
for idx, row in enumerate(stash_data):
|
||||
# 规范化name字段用于匹配
|
||||
key_str = parse_union_key(row["date"], row["name"], row["code"])
|
||||
dict_stash_data[key_str] = idx
|
||||
if key_str not in dict_stash_data:
|
||||
dict_stash_data[key_str] = set()
|
||||
dict_stash_data[key_str].add(idx)
|
||||
stash_data[idx]['matched'] = False # 标记是否已匹配
|
||||
|
||||
dict_whisper_data = {}
|
||||
for idx, row in enumerate(whisper_data):
|
||||
# 规范化name字段用于匹配
|
||||
key_str = parse_union_key(row["release_date"], row["studio_name"], row["whisper_code"])
|
||||
dict_whisper_data[key_str] = idx
|
||||
|
||||
if key_str not in dict_whisper_data:
|
||||
dict_whisper_data[key_str] = set()
|
||||
dict_whisper_data[key_str].add(idx)
|
||||
|
||||
# 4. 内存中进行双边匹配
|
||||
result = []
|
||||
dict_result_keys = set()
|
||||
count_matched = 0
|
||||
count_whisper_only = 0
|
||||
count_stash_only = 0
|
||||
@ -202,27 +206,48 @@ def generate_union_table(mysql_conn):
|
||||
# 先处理whisper到stash的匹配(原left join逻辑)
|
||||
for w in whisper_data:
|
||||
matched = False
|
||||
s = None
|
||||
if w['release_date'] and w['studio_name']:
|
||||
# 对name进行去空格,去特殊字符处理
|
||||
key_str = parse_union_key(w['release_date'], w['studio_name'], w['whisper_code'])
|
||||
if key_str in dict_stash_data:
|
||||
s = stash_data[dict_stash_data[key_str]]
|
||||
result.append({
|
||||
'whisper_id': w['Id'],
|
||||
'release_year': w['release_year'] or 0,
|
||||
'release_date': w['release_date'],
|
||||
'whisper_code': w['whisper_code'],
|
||||
'title': w['title'],
|
||||
'studio_name': w['studio_name'],
|
||||
'date': s['date'],
|
||||
'code': s['code'],
|
||||
'stash_title': s['stash_title'],
|
||||
'name': s['name']
|
||||
})
|
||||
dict_result_keys.add(key_str)
|
||||
count_matched += 1
|
||||
matched = True
|
||||
if not matched:
|
||||
set_idx = dict_stash_data[key_str]
|
||||
if len(set_idx) == 1:
|
||||
idx = set_idx.pop()
|
||||
s = stash_data[idx]
|
||||
stash_data[idx]['matched'] = True
|
||||
matched = True
|
||||
else:
|
||||
# 多重匹配时,把code相同且未匹配过的优先匹配
|
||||
for idx in set_idx:
|
||||
if (stash_data[idx]['code'] and w['whisper_code']) and stash_data[idx]['code'].lower() == w['whisper_code'].lower() and not stash_data[idx].get('matched', False):
|
||||
s = stash_data[idx]
|
||||
stash_data[idx]['matched'] = True
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
# 否则匹配第一个未匹配过的
|
||||
for idx in set_idx:
|
||||
if not stash_data[idx].get('matched', False):
|
||||
s = stash_data[idx]
|
||||
stash_data[idx]['matched'] = True
|
||||
matched = True
|
||||
break
|
||||
if matched and s:
|
||||
result.append({
|
||||
'whisper_id': w['Id'],
|
||||
'release_year': w['release_year'] or 0,
|
||||
'release_date': w['release_date'],
|
||||
'whisper_code': w['whisper_code'],
|
||||
'title': w['title'],
|
||||
'studio_name': w['studio_name'],
|
||||
'date': s['date'],
|
||||
'code': s['code'],
|
||||
'stash_title': s['stash_title'],
|
||||
'name': s['name']
|
||||
})
|
||||
count_matched += 1
|
||||
else:
|
||||
result.append({
|
||||
'whisper_id': w['Id'],
|
||||
'release_year': w['release_year'] or 0,
|
||||
@ -239,27 +264,22 @@ def generate_union_table(mysql_conn):
|
||||
|
||||
# 再处理stash到whisper的反向匹配(新增双边匹配逻辑)
|
||||
for s in stash_data:
|
||||
matched = False
|
||||
if s['date'] and s['name']:
|
||||
# 对name进行去空格,去特殊字符处理
|
||||
key_str = parse_union_key(s['date'], s['name'], s['code'])
|
||||
if key_str in dict_whisper_data:
|
||||
matched = True
|
||||
|
||||
if not matched:
|
||||
result.append({
|
||||
'whisper_id': 0,
|
||||
'release_year': 0,
|
||||
'release_date': None,
|
||||
'whisper_code': None,
|
||||
'title': None,
|
||||
'studio_name': None,
|
||||
'date': s['date'],
|
||||
'code': s['code'],
|
||||
'stash_title': s['stash_title'],
|
||||
'name': s['name']
|
||||
})
|
||||
count_stash_only += 1
|
||||
if s['matched']:
|
||||
continue # 已匹配过的跳过
|
||||
|
||||
result.append({
|
||||
'whisper_id': 0,
|
||||
'release_year': 0,
|
||||
'release_date': None,
|
||||
'whisper_code': None,
|
||||
'title': None,
|
||||
'studio_name': None,
|
||||
'date': s['date'],
|
||||
'code': s['code'],
|
||||
'stash_title': s['stash_title'],
|
||||
'name': s['name']
|
||||
})
|
||||
count_stash_only += 1
|
||||
|
||||
print(f"匹配完成:匹配成功 {count_matched} 条,Whisper 独有 {count_whisper_only} 条,Stash 独有 {count_stash_only} 条")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user