modify scripts
This commit is contained in:
162
vixen_group/blacked-format.py
Normal file
162
vixen_group/blacked-format.py
Normal file
@ -0,0 +1,162 @@
|
||||
import re
|
||||
|
||||
# 特例处理定义
|
||||
SPECIAL_CASES_THREE = {"alex.h.banks", "jenna.j.ross", "anna.de.ville", "kylie.le.beau", "syren.de.mer", "rae.lil.black", "anna.claire.clouds", "mina.von.d", "victoria.rae.black", "Kagney.Linn.Karter", "ariana.van.x"}
|
||||
SPECIAL_CASES_ONE = {"sybil", "zaawaadi", "lutro", "kazumi", "goldie", "camille", "Trillium", "sinderella"}
|
||||
|
||||
# 定义后缀列表
|
||||
SUFFIXES = [
|
||||
".XXX.1080p.MP4-KTR.mp4.mp4",
|
||||
".XXX.1080p.MP4-KTR.mp4",
|
||||
".XXX1080p.MP4-KTR.mp4",
|
||||
".1080p.MP4-XXX[XC].mp4",
|
||||
".XXX.1080p.MP4-NBQ.mp4",
|
||||
".XXX.1080p.mp4",
|
||||
".1080p.mp4",
|
||||
".[N1C].mp4",
|
||||
"[N1C].mp4",
|
||||
".xxx.mp4",
|
||||
".mp4",
|
||||
]
|
||||
|
||||
|
||||
# 定义转换映射
|
||||
REPLACEMENT_MAP = {
|
||||
"BLACKED.20.09.05.Brooklyn.Gray.After.Work.1080p.mp4 4.0 GB" : "BLACKED.20.09.05.Brooklyn.Gray.1080p.mp4 4.0 GB",
|
||||
"BLACKED.20.09.12.Alexis.Tae.Temptress.In.Law.1080p.mp4 4.0 GB" : "BLACKED.20.09.12.Alexis.Tae.1080p.mp4 4.0 GB",
|
||||
"BLACKED.20.10.03.Gabbie.Carter.Pretty.Little.Liar.1080p.mp4 4.2 GB" : "BLACKED.20.10.03.Gabbie.Carter.1080p.mp4 4.2 GB",
|
||||
"BLACKED.20.11.21.Adriana.Chechik.Kira.Noir.Lazy.Sunday.1080p.mp4 4.7 GB" : "BLACKED.20.11.21.Adriana.Chechik.Kira.Noir.1080p.mp4 4.7 GB",
|
||||
"BLACKED.20.11.28.Chloe.Cherry.Too.Strong.1080p.mp4 4.4 GB" : "BLACKED.20.11.28.Chloe.Cherry.1080p.mp4 4.4 GB",
|
||||
"BLACKED.20.05.14.first.time.BLACKED.compilation.mp4 3.2 GB" : "BLACKED.20.05.14.unknown.mp4 3.2 GB"
|
||||
}
|
||||
|
||||
def find_suffix(file_name):
|
||||
for suffix in SUFFIXES:
|
||||
if file_name.lower().endswith(suffix.lower()):
|
||||
return suffix
|
||||
return None
|
||||
|
||||
|
||||
def capitalize_name(name):
|
||||
return ' '.join([part.capitalize() for part in name.split(' ')])
|
||||
|
||||
def parse_actors2(actors_segment):
|
||||
actors = []
|
||||
i = 0
|
||||
while i < len(actors_segment):
|
||||
current_actor_part = ".".join(actors_segment[i:i+3]).lower()
|
||||
if current_actor_part in SPECIAL_CASES_THREE:
|
||||
actors.append(" ".join(actors_segment[i:i+3]))
|
||||
i += 3
|
||||
elif actors_segment[i].lower() in SPECIAL_CASES_ONE:
|
||||
actors.append(actors_segment[i])
|
||||
i += 1
|
||||
else:
|
||||
next_segment = " ".join(actors_segment[i:i+3])
|
||||
if ".And." in next_segment.lower():
|
||||
# 递归处理and两边的名字
|
||||
print(next_segment)
|
||||
first_part = actors_segment[:i+2]
|
||||
second_part = actors_segment[i+3:]
|
||||
actors += parse_actors(first_part)
|
||||
actors += parse_actors(second_part)
|
||||
break
|
||||
elif i+1<len(actors_segment):
|
||||
actor = actors_segment[i] + ' ' + actors_segment[i + 1]
|
||||
actors.append(actor)
|
||||
i += 2
|
||||
else:
|
||||
#print(actors_segment)
|
||||
break
|
||||
actor = actors_segment[i]
|
||||
actors.append(actor)
|
||||
i += 1
|
||||
return actors
|
||||
|
||||
def parse_actors(actors_segment):
|
||||
# Step 1: Check for the presence of 'and' (case-insensitive)
|
||||
try:
|
||||
and_index = next(i for i, part in enumerate(actors_segment) if part.lower() == "and")
|
||||
# Recursive processing for the left and right parts of 'and'
|
||||
left_actors = parse_actors(actors_segment[:and_index])
|
||||
right_actors = parse_actors(actors_segment[and_index + 1:])
|
||||
return left_actors + right_actors
|
||||
except StopIteration:
|
||||
# No 'and' found, proceed with normal parsing logic
|
||||
actors = []
|
||||
i = 0
|
||||
while i < len(actors_segment):
|
||||
current_actor_part = ".".join(actors_segment[i:i+3]).lower()
|
||||
if current_actor_part in SPECIAL_CASES_THREE:
|
||||
actor_name = " ".join(actors_segment[i:i+3])
|
||||
actors.append(capitalize_name(actor_name))
|
||||
i += 3
|
||||
elif actors_segment[i].lower() in SPECIAL_CASES_ONE:
|
||||
actor_name = actors_segment[i]
|
||||
actors.append(capitalize_name(actor_name))
|
||||
i += 1
|
||||
elif i+1<len(actors_segment):
|
||||
actor_name = actors_segment[i] + ' ' + actors_segment[i + 1]
|
||||
actors.append(capitalize_name(actor_name))
|
||||
i += 2
|
||||
else:
|
||||
actor = actors_segment[i]
|
||||
actors.append(actor)
|
||||
i += 1
|
||||
return actors
|
||||
|
||||
def parse_blacked_file(input_file, output_file):
|
||||
with open(input_file, 'r', encoding='utf-8') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
for line in lines:
|
||||
# 预处理:替换特定的 str1 为 str2
|
||||
line = REPLACEMENT_MAP.get(line.strip(), line)
|
||||
|
||||
# 预处理:将 '&' 替换为 'And'
|
||||
line = line.replace('&', 'And')
|
||||
|
||||
# 预处理:处理额外的空格
|
||||
parts = line.strip().split(' ')
|
||||
if len(parts) > 3:
|
||||
file_name = ''.join(parts[:-2]) # 拼接 file_name 部分
|
||||
file_size = parts[-2]
|
||||
file_unit = parts[-1]
|
||||
else:
|
||||
file_name = parts[0]
|
||||
file_size = parts[1]
|
||||
file_unit = parts[2]
|
||||
|
||||
# 移除前缀 "BLACKED."
|
||||
file_name = file_name.replace("BLACKED.", "")
|
||||
|
||||
# 分割文件名部分
|
||||
segments = file_name.split('.')
|
||||
|
||||
# 提取日期
|
||||
date = f"20{segments[0]}.{segments[1]}.{segments[2]}"
|
||||
|
||||
# 找到影片名后缀
|
||||
suffix = find_suffix(file_name)
|
||||
if suffix:
|
||||
suffix_index = file_name.lower().find(suffix.lower())
|
||||
actors_segment = file_name[9:suffix_index].split('.')
|
||||
else:
|
||||
actors_segment = segments[3:]
|
||||
|
||||
# 解析演员名字
|
||||
actors = parse_actors(actors_segment)
|
||||
|
||||
# 格式化影片名后缀
|
||||
movie_name_suffix = f"blacked{suffix}" if suffix else "blacked.unknown"
|
||||
|
||||
# 组合输出格式
|
||||
actor_str = ', '.join(actors) if len(actors) > 1 else actors[0]
|
||||
formatted_line = f"{date} {actor_str} - {movie_name_suffix} {file_size} {file_unit}\n"
|
||||
f.write(formatted_line)
|
||||
|
||||
# 使用方法
|
||||
input_file = "./input_files/blacked-all.txt" # 替换为您的输入文本文件路径
|
||||
output_file = "./formatted/blacked-list.txt" # 结果输出的文件名
|
||||
parse_blacked_file(input_file, output_file)
|
||||
Reference in New Issue
Block a user