import re # 特例处理定义 SPECIAL_CASES_THREE = {"alex.h.banks", "jenna.j.ross", "anna.de.ville", "kylie.le.beau", "syren.de.mer", "rae.lil.black", "anna.claire.clouds", "mina.von.d", "victoria.rae.black", "Kagney.Linn.Karter", "ariana.van.x"} SPECIAL_CASES_ONE = {"sybil", "zaawaadi", "lutro", "kazumi", "goldie", "camille", "Trillium", "sinderella"} # 定义后缀列表 SUFFIXES = [ ".XXX.1080p.MP4-KTR.mp4.mp4", ".XXX.1080p.MP4-KTR.mp4", ".XXX1080p.MP4-KTR.mp4", ".1080p.MP4-XXX[XC].mp4", ".XXX.1080p.MP4-NBQ.mp4", ".XXX.1080p.mp4", ".1080p.mp4", ".[N1C].mp4", "[N1C].mp4", ".xxx.mp4", ".mp4", ] # 定义转换映射 REPLACEMENT_MAP = { "BLACKED.20.09.05.Brooklyn.Gray.After.Work.1080p.mp4 4.0 GB" : "BLACKED.20.09.05.Brooklyn.Gray.1080p.mp4 4.0 GB", "BLACKED.20.09.12.Alexis.Tae.Temptress.In.Law.1080p.mp4 4.0 GB" : "BLACKED.20.09.12.Alexis.Tae.1080p.mp4 4.0 GB", "BLACKED.20.10.03.Gabbie.Carter.Pretty.Little.Liar.1080p.mp4 4.2 GB" : "BLACKED.20.10.03.Gabbie.Carter.1080p.mp4 4.2 GB", "BLACKED.20.11.21.Adriana.Chechik.Kira.Noir.Lazy.Sunday.1080p.mp4 4.7 GB" : "BLACKED.20.11.21.Adriana.Chechik.Kira.Noir.1080p.mp4 4.7 GB", "BLACKED.20.11.28.Chloe.Cherry.Too.Strong.1080p.mp4 4.4 GB" : "BLACKED.20.11.28.Chloe.Cherry.1080p.mp4 4.4 GB", "BLACKED.20.05.14.first.time.BLACKED.compilation.mp4 3.2 GB" : "BLACKED.20.05.14.unknown.mp4 3.2 GB" } def find_suffix(file_name): for suffix in SUFFIXES: if file_name.lower().endswith(suffix.lower()): return suffix return None def capitalize_name(name): return ' '.join([part.capitalize() for part in name.split(' ')]) def parse_actors2(actors_segment): actors = [] i = 0 while i < len(actors_segment): current_actor_part = ".".join(actors_segment[i:i+3]).lower() if current_actor_part in SPECIAL_CASES_THREE: actors.append(" ".join(actors_segment[i:i+3])) i += 3 elif actors_segment[i].lower() in SPECIAL_CASES_ONE: actors.append(actors_segment[i]) i += 1 else: next_segment = " ".join(actors_segment[i:i+3]) if ".And." in next_segment.lower(): # 递归处理and两边的名字 print(next_segment) first_part = actors_segment[:i+2] second_part = actors_segment[i+3:] actors += parse_actors(first_part) actors += parse_actors(second_part) break elif i+1 3: file_name = ''.join(parts[:-2]) # 拼接 file_name 部分 file_size = parts[-2] file_unit = parts[-1] else: file_name = parts[0] file_size = parts[1] file_unit = parts[2] # 移除前缀 "BLACKED." file_name = file_name.replace("BLACKED.", "") # 分割文件名部分 segments = file_name.split('.') # 提取日期 date = f"20{segments[0]}.{segments[1]}.{segments[2]}" # 找到影片名后缀 suffix = find_suffix(file_name) if suffix: suffix_index = file_name.lower().find(suffix.lower()) actors_segment = file_name[9:suffix_index].split('.') else: actors_segment = segments[3:] # 解析演员名字 actors = parse_actors(actors_segment) # 格式化影片名后缀 movie_name_suffix = f"blacked{suffix}" if suffix else "blacked.unknown" # 组合输出格式 actor_str = ', '.join(actors) if len(actors) > 1 else actors[0] formatted_line = f"{date} {actor_str} - {movie_name_suffix} {file_size} {file_unit}\n" f.write(formatted_line) # 使用方法 input_file = "./input_files/blacked-all.txt" # 替换为您的输入文本文件路径 output_file = "./formatted/blacked-list.txt" # 结果输出的文件名 parse_blacked_file(input_file, output_file)