import re from collections import defaultdict def extract_actors(file_path, output_file): actor_dates = defaultdict(set) with open(file_path, 'r', encoding='utf-8') as file: for line in file: # 使用正则表达式匹配日期和演员名字部分 match = re.search(r'(\d{4}\.\d{2}\.\d{2})\s+(.+?)\s+-', line) if match: date = match.group(1) actors = match.group(2).split(',') for actor in actors: actor = actor.strip() if actor: actor_dates[actor].add(date) # 将结果写入文件 with open(output_file, 'w', encoding='utf-8') as f: for actor in sorted(actor_dates.keys()): dates = sorted(actor_dates[actor]) f.write(f"{actor} | {', '.join(dates)}\n") # 使用方法 file_path = "./formatted/tushy-list.txt" # 替换为您的文本文件路径 output_file = "./result/tushy-actress.txt" # 结果输出的文件名 extract_actors(file_path, output_file)