modify some scripts.

This commit is contained in:
2025-03-03 19:01:41 +08:00
parent 8fd48687fc
commit f1e5abd6b3
10 changed files with 1642 additions and 0 deletions

92
scripts/iafd/src/utils.py Normal file
View File

@ -0,0 +1,92 @@
import re
import os
import json
import time
import csv
import logging
# 解析 height 和 weight转换成数字
def parse_height(height_str):
return 0
try:
return int(height_str.split("(")[-1].replace(" cm)", ""))
except:
return None
def parse_weight(weight_str):
return 0
try:
return int(weight_str.split(" ")[0])
except:
return None
update_dir = '../result'
performers_dir = f'{update_dir}/performers'
movies_dir = f'{update_dir}/movies'
def uniq_performers(new_performers):
try:
if not isinstance(new_performers, list):
raise TypeError(f"new_performers should be a list, but got {type(new_performers)}")
seen = set()
unique_performers = []
for item in new_performers:
if not item or item['href'] is None:
raise ValueError(f"Invalid item in new_performers: {item}")
if item["href"] not in seen:
seen.add(item["href"])
unique_performers.append(item)
return unique_performers
except Exception as e:
logging.error(f"Error in remove_duplicate_performers: {e}")
return [] # 返回空列表,避免程序崩溃
# 创建目录
def create_sub_directory(base_dir, str):
# 获取 person 的前两个字母并转为小写
sub_dir = str[:1].lower()
full_path = os.path.join(base_dir, sub_dir)
if not os.path.exists(full_path):
os.makedirs(full_path)
return full_path
# 从 https://www.iafd.com/person.rme/id=21898a3c-1ddd-4793-8d93-375d6db20586 中抽取 id 的值
def extract_id_from_href(href):
"""从href中提取id参数"""
match = re.search(r'id=([a-f0-9\-]+)', href)
return match.group(1) if match else ''
# 写入每个 performer 的单独 JSON 文件
def write_person_json(person, href, data):
# 获取目录
person_dir = create_sub_directory(performers_dir, person)
person_id = extract_id_from_href(href)
person_filename = f"{person.replace(' ', '-')}({person_id}).json" # 用 - 替换空格
full_path = os.path.join(person_dir, person_filename)
try:
with open(full_path, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=4, ensure_ascii=False)
except Exception as e:
logging.error(f"Error writing file {full_path}: {e}")
# 写入每个 performer 的单独 JSON 文件
def write_movie_json(href, data):
# 获取目录
movie_id = extract_id_from_href(href)
person_dir = create_sub_directory(movies_dir, movie_id)
person_filename = f"{movie_id}.json" # 用 - 替换空格
full_path = os.path.join(person_dir, person_filename)
try:
with open(full_path, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=4, ensure_ascii=False)
except Exception as e:
logging.error(f"Error writing file {full_path}: {e}")