101 lines
3.0 KiB
Python
101 lines
3.0 KiB
Python
import re
|
||
import os
|
||
import json
|
||
import time
|
||
import csv
|
||
import logging
|
||
|
||
# 解析 height 和 weight(转换成数字)
|
||
def parse_height(height_str):
|
||
return 0
|
||
try:
|
||
return int(height_str.split("(")[-1].replace(" cm)", ""))
|
||
except:
|
||
return None
|
||
|
||
def parse_weight(weight_str):
|
||
return 0
|
||
try:
|
||
return int(weight_str.split(" ")[0])
|
||
except:
|
||
return None
|
||
|
||
update_dir = '../result'
|
||
performers_dir = f'{update_dir}/performers'
|
||
movies_dir = f'{update_dir}/movies'
|
||
|
||
def to_number(value):
|
||
"""将字符串转换为数字,如果无效则返回 0"""
|
||
try:
|
||
return float(value)
|
||
except (ValueError, TypeError):
|
||
return 0
|
||
|
||
def dist_stu_href_rewrite(href):
|
||
# 提取 ID(适用于 distrib 或 studio)
|
||
import re
|
||
match = re.search(r"(distrib|studio)=(\d+)", href)
|
||
if not match:
|
||
return None # 不是目标 URL,返回 None
|
||
|
||
key, id_number = match.groups()
|
||
new_url = f"https://www.iafd.com/{key}.rme/{key}={id_number}"
|
||
return new_url
|
||
|
||
# 创建目录
|
||
def create_sub_directory(base_dir, str):
|
||
# 获取 person 的前两个字母并转为小写
|
||
sub_dir = str[:1].lower()
|
||
full_path = os.path.join(base_dir, sub_dir)
|
||
if not os.path.exists(full_path):
|
||
os.makedirs(full_path)
|
||
return full_path
|
||
|
||
# 从 https://www.iafd.com/person.rme/id=21898a3c-1ddd-4793-8d93-375d6db20586 中抽取 id 的值
|
||
def extract_id_from_href(href):
|
||
"""从href中提取id参数"""
|
||
match = re.search(r'id=([a-f0-9\-]+)', href)
|
||
return match.group(1) if match else ''
|
||
|
||
# 写入每个 performer 的单独 JSON 文件
|
||
def write_person_json(person, href, data):
|
||
# 获取目录
|
||
person_dir = create_sub_directory(performers_dir, person)
|
||
person_id = extract_id_from_href(href)
|
||
person_filename = f"{person.replace(' ', '-')}({person_id}).json" # 用 - 替换空格
|
||
full_path = os.path.join(person_dir, person_filename)
|
||
|
||
try:
|
||
with open(full_path, 'w', encoding='utf-8') as json_file:
|
||
json.dump(data, json_file, indent=4, ensure_ascii=False)
|
||
except Exception as e:
|
||
logging.error(f"Error writing file {full_path}: {e}")
|
||
|
||
|
||
# 写入每个 performer 的单独 JSON 文件
|
||
def write_movie_json(href, data):
|
||
# 获取目录
|
||
movie_id = extract_id_from_href(href)
|
||
person_dir = create_sub_directory(movies_dir, movie_id)
|
||
person_filename = f"{movie_id}.json" # 用 - 替换空格
|
||
full_path = os.path.join(person_dir, person_filename)
|
||
|
||
try:
|
||
with open(full_path, 'w', encoding='utf-8') as json_file:
|
||
json.dump(data, json_file, indent=4, ensure_ascii=False)
|
||
except Exception as e:
|
||
logging.error(f"Error writing file {full_path}: {e}")
|
||
|
||
|
||
# 读取json文件并返回内容
|
||
def read_json(file_path):
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
except FileNotFoundError:
|
||
print(f"文件 {file_path} 未找到.")
|
||
return None
|
||
except json.JSONDecodeError:
|
||
print(f"文件 {file_path} 解析错误.")
|
||
return None
|