import re import os import json import time import csv import logging # 解析 height 和 weight(转换成数字) def parse_height(height_str): return 0 try: return int(height_str.split("(")[-1].replace(" cm)", "")) except: return None def parse_weight(weight_str): return 0 try: return int(weight_str.split(" ")[0]) except: return None update_dir = '../result' performers_dir = f'{update_dir}/performers' movies_dir = f'{update_dir}/movies' def to_number(value): """将字符串转换为数字,如果无效则返回 0""" try: return float(value) except (ValueError, TypeError): return 0 def dist_stu_href_rewrite(href): # 提取 ID(适用于 distrib 或 studio) import re match = re.search(r"(distrib|studio)=(\d+)", href) if not match: return None # 不是目标 URL,返回 None key, id_number = match.groups() new_url = f"https://www.iafd.com/{key}.rme/{key}={id_number}" return new_url # 创建目录 def create_sub_directory(base_dir, str): # 获取 person 的前两个字母并转为小写 sub_dir = str[:1].lower() full_path = os.path.join(base_dir, sub_dir) if not os.path.exists(full_path): os.makedirs(full_path) return full_path # 从 https://www.iafd.com/person.rme/id=21898a3c-1ddd-4793-8d93-375d6db20586 中抽取 id 的值 def extract_id_from_href(href): """从href中提取id参数""" match = re.search(r'id=([a-f0-9\-]+)', href) return match.group(1) if match else '' # 写入每个 performer 的单独 JSON 文件 def write_person_json(person, href, data): # 获取目录 person_dir = create_sub_directory(performers_dir, person) person_id = extract_id_from_href(href) person_filename = f"{person.replace(' ', '-')}({person_id}).json" # 用 - 替换空格 full_path = os.path.join(person_dir, person_filename) try: with open(full_path, 'w', encoding='utf-8') as json_file: json.dump(data, json_file, indent=4, ensure_ascii=False) except Exception as e: logging.error(f"Error writing file {full_path}: {e}") # 写入每个 performer 的单独 JSON 文件 def write_movie_json(href, data): # 获取目录 movie_id = extract_id_from_href(href) person_dir = create_sub_directory(movies_dir, movie_id) person_filename = f"{movie_id}.json" # 用 - 替换空格 full_path = os.path.join(person_dir, person_filename) try: with open(full_path, 'w', encoding='utf-8') as json_file: json.dump(data, json_file, indent=4, ensure_ascii=False) except Exception as e: logging.error(f"Error writing file {full_path}: {e}") # 读取json文件并返回内容 def read_json(file_path): try: with open(file_path, 'r', encoding='utf-8') as f: return json.load(f) except FileNotFoundError: print(f"文件 {file_path} 未找到.") return None except json.JSONDecodeError: print(f"文件 {file_path} 解析错误.") return None