import re # 特例处理定义 SPECIAL_CASES_THREE = {"alex.h.banks", "jenna.j.ross", "anna.de.ville", "kylie.le.beau", "syren.de.mer", "rae.lil.black"} SPECIAL_CASES_ONE = {"Sybil", "Zaawaadi", "Lutro", "Kazumi"} def parse_tushy_file(input_file, output_file): with open(input_file, 'r', encoding='utf-8') as file: lines = file.readlines() with open(output_file, 'w', encoding='utf-8') as f: for line in lines: # 特殊处理 Tushy.18.07.15.Sybil.Oil.&.Anal.1080p.mp4 if "Tushy.18.07.15.Sybil.Oil.&.Anal.1080p.mp4" in line: line = line.replace("&", "and") # 拆分行内容 parts = line.strip().split(' ') file_name = parts[0] file_size = parts[1] file_unit = parts[2] # 移除前缀 "Tushy." file_name = file_name.replace("Tushy.", "") # 分割文件名部分 segments = file_name.split('.') # 提取日期 date = f"20{segments[0]}.{segments[1]}.{segments[2]}" # 解析演员名字 actors = [] i = 3 while i < len(segments) - 2: # 确保留出最后两个部分用于影片名和格式 current_actor_part = ".".join(segments[i:i+3]) if current_actor_part in SPECIAL_CASES_THREE: actors.append(" ".join(segments[i:i+3])) i += 3 elif segments[i] in SPECIAL_CASES_ONE: actors.append(segments[i]) i += 1 else: actor = segments[i] + ' ' + segments[i + 1] actors.append(actor) i += 2 if i < len(segments) and segments[i] != '&': break # 演员解析结束 elif i < len(segments) and segments[i] == '&': i += 1 # 跳过 & # 剩余部分为影片名 movie_name = ' '.join(segments[i:-2]) if not movie_name: # 如果影片名为空 movie_name = "xxxx" # 最后两部分为分辨率和格式 resolution = segments[-2] file_format = segments[-1] # 组合输出格式 actor_str = ', '.join(actors) formatted_line = f"{date} {actor_str} - {movie_name}.{resolution}.{file_format} {file_size} {file_unit}\n" f.write(formatted_line) # 使用方法 input_file = "./input_files/tushy-raw.txt" # 替换为您的输入文本文件路径 output_file = "./formatted/tushy-list.txt" # 结果输出的文件名 parse_tushy_file(input_file, output_file)