Initial commit

This commit is contained in:
2024-10-03 11:13:19 +08:00
commit 7fc57ff25d
31 changed files with 11224 additions and 0 deletions

View File

@ -0,0 +1,30 @@
import re
from collections import defaultdict
def extract_actors(file_path, output_file):
actor_dates = defaultdict(set)
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
# 使用正则表达式匹配日期和演员名字部分
match = re.search(r'(\d{4}\.\d{2}\.\d{2})\s+(.+?)\s+-', line)
if match:
date = match.group(1)
actors = match.group(2).split(',')
for actor in actors:
actor = actor.strip()
if actor:
actor_dates[actor].add(date)
# 将结果写入文件
with open(output_file, 'w', encoding='utf-8') as f:
for actor in sorted(actor_dates.keys()):
dates = sorted(actor_dates[actor])
f.write(f"{actor} {', '.join(dates)}\n")
# 使用方法
file_path = "./formatted/blacked-list.txt" # 替换为您的文本文件路径
output_file = "./result/blacked-actress.txt" # 结果输出的文件名
extract_actors(file_path, output_file)