31 lines
1.1 KiB
Python
31 lines
1.1 KiB
Python
import re
|
||
from collections import defaultdict
|
||
|
||
def extract_actors(file_path, output_file):
|
||
actor_dates = defaultdict(set)
|
||
|
||
with open(file_path, 'r', encoding='utf-8') as file:
|
||
for line in file:
|
||
# 使用正则表达式匹配日期和演员名字部分
|
||
match = re.search(r'(\d{4}\.\d{2}\.\d{2})\s+(.+?)\s+-', line)
|
||
if match:
|
||
date = match.group(1)
|
||
actors = match.group(2).split(',')
|
||
for actor in actors:
|
||
actor = actor.strip()
|
||
if actor:
|
||
actor_dates[actor].add(date)
|
||
|
||
# 将结果写入文件
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
for actor in sorted(actor_dates.keys()):
|
||
dates = sorted(actor_dates[actor])
|
||
f.write(f"{actor} | {', '.join(dates)}\n")
|
||
|
||
# 使用方法
|
||
file_path = "./input_files/vixen-all.txt" # 替换为您的文本文件路径
|
||
output_file = "./result/vixen-actress.txt" # 结果输出的文件名
|
||
extract_actors(file_path, output_file)
|
||
|
||
|