modify scripts
This commit is contained in:
108
javhd/list_fetch.py
Normal file
108
javhd/list_fetch.py
Normal file
@ -0,0 +1,108 @@
|
||||
"""
|
||||
Script Name:
|
||||
Description: 从 javhd.com 上获取女优列表,并逐个获取女优详细信息。
|
||||
list_fetch.py 从网站上获取列表, 并以json的形式把结果输出到本地文件, 支持ja,zh,en三种语言可选(一般情况下可以三种全部拉取一遍);
|
||||
list_format.py 则把这些文件读取出来,合并,形成完整的列表, 主要是把三种语言的女优名字拼到一起, 使用处理后的链接地址+图片地址作为判断同一个人的依据;
|
||||
model_fetch.py 则把上一步获取到的列表,读取详情页面,合并进来一些详细信息。
|
||||
注意: Header部分是从浏览器中抓取的, 时间久了可能要替换。
|
||||
|
||||
Author: [Your Name]
|
||||
Created Date: YYYY-MM-DD
|
||||
Last Modified: YYYY-MM-DD
|
||||
Version: 1.0
|
||||
|
||||
Modification History:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
"""
|
||||
|
||||
import requests
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
import sys
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
# 设置初始 URL
|
||||
BASE_URL = "https://javhd.com"
|
||||
#START_URL = "/ja/model"
|
||||
#START_URL = "/zh/model"
|
||||
START_URL = "/en/model"
|
||||
HEADERS = {
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"content-type": "application/json",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
|
||||
"x-requested-with": "XMLHttpRequest",
|
||||
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
|
||||
'content-type': 'application/json',
|
||||
'cookie': 'adult-warning-popup=disabled; st_d=%7B%7D; feid=c18cd2f2cf5c034d120e5975558acc8c; xfeid=3b040b0aecba9d3df41f21732480d947; _ym_uid=1739069925634817268; _ym_d=1739069925; atas_uid=; _clck=1cd9xpy%7C2%7Cftb%7C0%7C1866; _ym_isad=2; nats=ODY0LjIuMi4yNi4yMzQuMC4wLjAuMA; nats_cookie=https%253A%252F%252Fcn.pornhub.com%252F; nats_unique=ODY0LjIuMi4yNi4yMzQuMC4wLjAuMA; nats_sess=480e7410e649efce6003c3add587a579; nats_landing=No%2BLanding%2BPage%2BURL; JAVSESSID=n42hnvj3ecr0r6tadusladpk3h; user_lang=zh; locale=ja; utm=%7B%22ads_type%22%3A%22%22%7D; sid=3679b28ec523df85ec4e7739e32f2008; _ym_visorc=w; feid_sa=62; sid_sa=2' ,
|
||||
'origin': 'https://javhd.com',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://javhd.com/ja/model' ,
|
||||
'sec-ch-ua': '"Not A(Brand";v="8", "Chromium";v="132", "Microsoft Edge";v="132"' ,
|
||||
'sec-ch-ua-mobile': '?0' ,
|
||||
'sec-ch-ua-platform': '"macOS"' ,
|
||||
'sec-fetch-dest': 'empty' ,
|
||||
'sec-fetch-mode': 'cors' ,
|
||||
'sec-fetch-site': 'same-origin' ,
|
||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0' ,
|
||||
'x-requested-with': 'XMLHttpRequest' ,
|
||||
}
|
||||
POST_DATA = {} # 空字典表示无数据
|
||||
|
||||
def sanitize_filename(url_path):
|
||||
"""将 URL 路径转换为合法的文件名"""
|
||||
return url_path.strip("/").replace("/", "_") + ".json"
|
||||
|
||||
def fetch_data(url, retries=3):
|
||||
"""从给定 URL 获取数据,带重试机制"""
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
response = requests.post(url, headers=HEADERS, json=POST_DATA, timeout=10)
|
||||
print(response)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"[错误] 请求失败 {url}: {e}, 重试 {attempt + 1}/{retries}")
|
||||
time.sleep(2)
|
||||
return None
|
||||
|
||||
def save_data(url, data):
|
||||
"""保存数据到文件"""
|
||||
parsed_url = urlparse(url)
|
||||
filename = sanitize_filename(parsed_url.path)
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
print(f"[成功] 数据已保存到 {filename}")
|
||||
|
||||
def main(s_url):
|
||||
current_url = urljoin(BASE_URL, s_url)
|
||||
while current_url:
|
||||
print(f"[信息] 正在抓取 {current_url}")
|
||||
data = fetch_data(current_url)
|
||||
|
||||
if not data:
|
||||
print(f"[错误] 无法获取数据 {current_url}")
|
||||
break
|
||||
|
||||
# 检查 JSON 结构
|
||||
if not all(key in data for key in ["status", "results_count", "pagination_params", "template"]):
|
||||
print(f"[错误] 数据结构异常: {data}")
|
||||
break
|
||||
|
||||
save_data(current_url, data)
|
||||
|
||||
# 获取下一页
|
||||
next_path = data.get("pagination_params", {}).get("next")
|
||||
if next_path:
|
||||
current_url = urljoin(BASE_URL, next_path)
|
||||
else:
|
||||
print("[信息] 已抓取所有页面。")
|
||||
break
|
||||
|
||||
if __name__ == "__main__":
|
||||
s_url = "/ja/model"
|
||||
if len(sys.argv) >= 2:
|
||||
s_url = f'/{sys.argv[1]}/model'
|
||||
main(s_url)
|
||||
119
javhd/list_format.py
Normal file
119
javhd/list_format.py
Normal file
@ -0,0 +1,119 @@
|
||||
"""
|
||||
Script Name:
|
||||
Description: 从 javhd.com 上获取女优列表,并逐个获取女优详细信息。
|
||||
list_fetch.py 从网站上获取列表, 并以json的形式把结果输出到本地文件, 支持ja,zh,en三种语言可选(一般情况下可以三种全部拉取一遍);
|
||||
list_format.py 则把这些文件读取出来,合并,形成完整的列表, 主要是把三种语言的女优名字拼到一起, 使用处理后的链接地址+图片地址作为判断同一个人的依据;
|
||||
model_fetch.py 则把上一步获取到的列表,读取详情页面,合并进来一些详细信息。
|
||||
注意: Header部分是从浏览器中抓取的, 时间久了可能要替换。
|
||||
|
||||
Author: [Your Name]
|
||||
Created Date: YYYY-MM-DD
|
||||
Last Modified: YYYY-MM-DD
|
||||
Version: 1.0
|
||||
|
||||
Modification History:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import glob
|
||||
import logging
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
# 结果目录
|
||||
RESULT_DIR = "result"
|
||||
RESULT_TMP_DIR = f'{RESULT_DIR}/tmp'
|
||||
OUTPUT_JSON = os.path.join(RESULT_DIR, "models.json")
|
||||
OUTPUT_CSV = os.path.join(RESULT_DIR, "models.csv")
|
||||
|
||||
# 可能需要重命名的文件
|
||||
LANGS = ["ja", "en", "zh"]
|
||||
for lang in LANGS:
|
||||
old_file = os.path.join(RESULT_TMP_DIR, f"{lang}_model.json")
|
||||
new_file = os.path.join(RESULT_TMP_DIR, f"{lang}_model_popular_1.json")
|
||||
if os.path.exists(old_file):
|
||||
logging.info(f"Renaming {old_file} to {new_file}")
|
||||
os.rename(old_file, new_file)
|
||||
|
||||
# 读取所有匹配的 JSON 文件
|
||||
file_paths = sorted(glob.glob(os.path.join(RESULT_TMP_DIR, "*_model_popular_*.json")))
|
||||
pattern = re.compile(r'(\w+)_model_popular_(\d+)\.json')
|
||||
|
||||
def normalize_url(url):
|
||||
"""去掉URL中的 en/ja/zh 子目录"""
|
||||
return re.sub(r'/(en|ja|zh)/', '/', url)
|
||||
|
||||
# 主处理程序
|
||||
def main_process():
|
||||
models = {}
|
||||
|
||||
for file_path in file_paths:
|
||||
match = pattern.search(os.path.basename(file_path))
|
||||
if not match:
|
||||
continue
|
||||
|
||||
lang, num = match.groups()
|
||||
num = int(num)
|
||||
|
||||
logging.info(f"Processing {file_path} (lang={lang}, num={num})")
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load {file_path}: {e}")
|
||||
continue
|
||||
|
||||
template = data.get("template", "")
|
||||
thumb_components = re.findall(r'<thumb-component[^>]*>', template)
|
||||
|
||||
for idx, thumb in enumerate(thumb_components, start=1):
|
||||
rank = (num - 1) * 36 + idx
|
||||
|
||||
link_content = re.search(r'link-content="(.*?)"', thumb)
|
||||
url_thumb = re.search(r'url-thumb="(.*?)"', thumb)
|
||||
title = re.search(r'title="(.*?)"', thumb)
|
||||
|
||||
if not url_thumb or not title:
|
||||
logging.info(f"no countent for rank:{rank} title:{title} url:{url_thumb} {thumb}")
|
||||
continue
|
||||
|
||||
pic = url_thumb.group(1)
|
||||
name = title.group(1)
|
||||
url = link_content.group(1) if link_content and lang == "en" else ""
|
||||
norm_url = normalize_url(link_content.group(1))
|
||||
|
||||
key = (pic, norm_url)
|
||||
if key not in models:
|
||||
models[key] = {"rank": rank, "ja_name": "", "zh_name": "", "en_name": "", "url": url, "pic": pic}
|
||||
|
||||
models[key][f"{lang}_name"] = name
|
||||
if lang == "en" and url:
|
||||
models[key]["url"] = url
|
||||
|
||||
# 按 rank 排序后输出 JSON
|
||||
sorted_models = sorted(models.values(), key=lambda x: x["rank"])
|
||||
|
||||
with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
|
||||
json.dump(sorted_models, f, indent=4, ensure_ascii=False)
|
||||
logging.info(f"Saved JSON output to {OUTPUT_JSON}")
|
||||
|
||||
# 输出 CSV 格式
|
||||
headers = ["rank", "ja_name", "zh_name", "en_name", "url", "pic"]
|
||||
with open(OUTPUT_CSV, "w", encoding="utf-8") as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=headers)
|
||||
writer.writeheader()
|
||||
writer.writerows(sorted_models)
|
||||
logging.info(f"Saved TXT output to {OUTPUT_CSV}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main_process()
|
||||
176
javhd/model_fetch.py
Normal file
176
javhd/model_fetch.py
Normal file
@ -0,0 +1,176 @@
|
||||
"""
|
||||
Script Name:
|
||||
Description: 从 javhd.com 上获取女优列表,并逐个获取女优详细信息。
|
||||
list_fetch.py 从网站上获取列表, 并以json的形式把结果输出到本地文件, 支持ja,zh,en三种语言可选(一般情况下可以三种全部拉取一遍);
|
||||
list_format.py 则把这些文件读取出来,合并,形成完整的列表, 主要是把三种语言的女优名字拼到一起, 使用处理后的链接地址+图片地址作为判断同一个人的依据;
|
||||
model_fetch.py 则把上一步获取到的列表,读取详情页面,合并进来一些详细信息。
|
||||
注意: Header部分是从浏览器中抓取的, 时间久了可能要替换。
|
||||
|
||||
Author: [Your Name]
|
||||
Created Date: YYYY-MM-DD
|
||||
Last Modified: YYYY-MM-DD
|
||||
Version: 1.0
|
||||
|
||||
Modification History:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
- YYYY-MM-DD [Your Name]:
|
||||
"""
|
||||
|
||||
import json
|
||||
import csv
|
||||
import requests
|
||||
import time
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
|
||||
# 目标文件路径
|
||||
INPUT_FILE = "result/models.json"
|
||||
OUTPUT_JSON = "result/javhd_models.json"
|
||||
OUTPUT_CSV = "result/javhd_models.csv"
|
||||
|
||||
HEADERS = {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
|
||||
'cookie': 'adult-warning-popup=disabled; st_d=%7B%7D; feid=c18cd2f2cf5c034d120e5975558acc8c; xfeid=3b040b0aecba9d3df41f21732480d947; _ym_uid=1739069925634817268; _ym_d=1739069925; atas_uid=; _clck=1cd9xpy%7C2%7Cftb%7C0%7C1866; _ym_isad=2; nats=ODY0LjIuMi4yNi4yMzQuMC4wLjAuMA; nats_cookie=https%253A%252F%252Fcn.pornhub.com%252F; nats_unique=ODY0LjIuMi4yNi4yMzQuMC4wLjAuMA; nats_sess=480e7410e649efce6003c3add587a579; nats_landing=No%2BLanding%2BPage%2BURL; JAVSESSID=n42hnvj3ecr0r6tadusladpk3h; user_lang=zh; locale=ja; utm=%7B%22ads_type%22%3A%22%22%7D; sid=3679b28ec523df85ec4e7739e32f2008; _ym_visorc=w; feid_sa=62; sid_sa=2' ,
|
||||
'origin': 'https://javhd.com',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://javhd.com/ja/model' ,
|
||||
'sec-ch-ua': '"Not A(Brand";v="8", "Chromium";v="132", "Microsoft Edge";v="132"' ,
|
||||
'sec-ch-ua-mobile': '?0' ,
|
||||
'sec-ch-ua-platform': '"macOS"' ,
|
||||
'sec-fetch-dest': 'empty' ,
|
||||
'sec-fetch-mode': 'cors' ,
|
||||
'sec-fetch-site': 'same-origin' ,
|
||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0' ,
|
||||
}
|
||||
# 需要提取的字段
|
||||
FIELDS = ["Height", "Weight", "Breast size", "Breast factor", "Hair color",
|
||||
"Eye color", "Birth date", "Ethnicity", "Birth place"]
|
||||
|
||||
|
||||
def fetch_data(url, retries=3):
|
||||
"""从给定 URL 获取数据,带重试机制"""
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
response = requests.get(url, headers=HEADERS, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"[错误] 请求失败 {url}: {e}, 重试 {attempt + 1}/{retries}")
|
||||
time.sleep(2)
|
||||
return None
|
||||
|
||||
|
||||
def process_paragraph(paragraph):
|
||||
# 获取完整的 HTML 结构,而不是 get_text()
|
||||
paragraph_html = str(paragraph)
|
||||
|
||||
# 使用 BeautifulSoup 解析移除水印标签后的 HTML 并提取文本
|
||||
soup = BeautifulSoup(paragraph_html, 'html.parser')
|
||||
cleaned_text = soup.get_text().strip()
|
||||
|
||||
return cleaned_text
|
||||
|
||||
# 读取已处理数据
|
||||
def load_existing_data():
|
||||
if os.path.exists(OUTPUT_JSON):
|
||||
try:
|
||||
with open(OUTPUT_JSON, "r", encoding="utf-8") as f:
|
||||
detailed_models = json.load(f)
|
||||
existing_names = {model["en_name"] for model in detailed_models}
|
||||
except Exception as e:
|
||||
logging.error(f"无法读取 {OUTPUT_JSON}: {e}")
|
||||
detailed_models = []
|
||||
existing_names = set()
|
||||
else:
|
||||
detailed_models = []
|
||||
existing_names = set()
|
||||
return detailed_models, existing_names
|
||||
|
||||
def process_data():
|
||||
# 读取原始 JSON 数据
|
||||
try:
|
||||
with open(INPUT_FILE, "r", encoding="utf-8") as f:
|
||||
models = json.load(f)
|
||||
except Exception as e:
|
||||
logging.error(f"无法读取 {INPUT_FILE}: {e}")
|
||||
return
|
||||
|
||||
detailed_models, existing_names = load_existing_data()
|
||||
|
||||
# 遍历 models.json 里的每个对象
|
||||
for model in models:
|
||||
en_name = model.get("en_name", "")
|
||||
ja_name = model.get('ja_name', '')
|
||||
url = model.get("url", "")
|
||||
|
||||
if not url or en_name in existing_names:
|
||||
logging.info(f"跳过 {en_name}, 已处理或无有效 URL")
|
||||
continue
|
||||
|
||||
logging.info(f"正在处理: {en_name} - {ja_name} - {url}")
|
||||
|
||||
try:
|
||||
response = fetch_data(url, retries=100)
|
||||
|
||||
if not response:
|
||||
logging.warning(f"请求失败 ({response.text}): {url}")
|
||||
break
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
info_section = soup.find("div", class_="info__features")
|
||||
|
||||
if not info_section:
|
||||
logging.warning(f"未找到 info__features 区块: {url}")
|
||||
continue
|
||||
|
||||
extracted_data = {field: "" for field in FIELDS}
|
||||
for li in info_section.find_all("li", class_="content-desc__list-item"):
|
||||
title_tag = li.find("strong", class_="content-desc__list-title")
|
||||
value_tag = li.find("span", class_="content-desc__list-text")
|
||||
if title_tag and value_tag:
|
||||
title = process_paragraph(title_tag)
|
||||
value = process_paragraph(value_tag)
|
||||
if title in extracted_data:
|
||||
extracted_data[title] = value
|
||||
|
||||
model.update(extracted_data)
|
||||
detailed_models.append(model)
|
||||
|
||||
# 追加写入 JSON 文件
|
||||
with open(OUTPUT_JSON, "w+", encoding="utf-8") as f:
|
||||
json.dump(detailed_models, f, ensure_ascii=False, indent=4)
|
||||
|
||||
logging.info(f"已保存: {en_name}")
|
||||
|
||||
time.sleep(3) # 适当延迟,防止请求过快
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"处理 {en_name} 失败: {e}")
|
||||
|
||||
|
||||
# 从 JSON 生成 CSV
|
||||
def json_to_csv():
|
||||
if not os.path.exists(OUTPUT_JSON):
|
||||
print("没有 JSON 文件,跳过 CSV 生成")
|
||||
return
|
||||
|
||||
with open(OUTPUT_JSON, "r", encoding="utf-8") as jsonfile:
|
||||
data = json.load(jsonfile)
|
||||
|
||||
fieldnames = data[0].keys()
|
||||
with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(data)
|
||||
|
||||
if __name__ == '__main__':
|
||||
process_data()
|
||||
json_to_csv()
|
||||
print("处理完成!")
|
||||
105
javhd/tools.py
Normal file
105
javhd/tools.py
Normal file
@ -0,0 +1,105 @@
|
||||
import json
|
||||
import sqlite3
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
home_dir = os.path.expanduser("~")
|
||||
global_host_data_dir = f'{home_dir}/hostdir/scripts_data'
|
||||
global_share_data_dir = f'{home_dir}/sharedata'
|
||||
|
||||
db_path = f"{global_share_data_dir}/shared.db"
|
||||
|
||||
def create_table():
|
||||
"""创建 SQLite 数据表"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS javhd_models (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
rank INTEGER,
|
||||
ja_name TEXT,
|
||||
zh_name TEXT,
|
||||
en_name TEXT,
|
||||
url TEXT UNIQUE,
|
||||
pic TEXT,
|
||||
height TEXT,
|
||||
weight TEXT,
|
||||
breast_size TEXT,
|
||||
breast_factor TEXT,
|
||||
hair_color TEXT,
|
||||
eye_color TEXT,
|
||||
birth_date TEXT,
|
||||
ethnicity TEXT,
|
||||
birth_place TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now', 'localtime')),
|
||||
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
|
||||
)
|
||||
''')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def insert_data(data):
|
||||
"""插入 JSON 数据到数据库,处理冲突情况"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
sql = '''
|
||||
INSERT INTO javhd_models (
|
||||
rank, ja_name, zh_name, en_name, url, pic, height, weight,
|
||||
breast_size, breast_factor, hair_color, eye_color, birth_date,
|
||||
ethnicity, birth_place, updated_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime'))
|
||||
ON CONFLICT(url) DO UPDATE SET
|
||||
rank=excluded.rank,
|
||||
ja_name=excluded.ja_name,
|
||||
zh_name=excluded.zh_name,
|
||||
en_name=excluded.en_name,
|
||||
pic=excluded.pic,
|
||||
height=excluded.height,
|
||||
weight=excluded.weight,
|
||||
breast_size=excluded.breast_size,
|
||||
breast_factor=excluded.breast_factor,
|
||||
hair_color=excluded.hair_color,
|
||||
eye_color=excluded.eye_color,
|
||||
birth_date=excluded.birth_date,
|
||||
ethnicity=excluded.ethnicity,
|
||||
birth_place=excluded.birth_place,
|
||||
updated_at=datetime('now', 'localtime');
|
||||
'''
|
||||
|
||||
for item in data:
|
||||
try:
|
||||
cursor.execute(sql, (
|
||||
item.get("rank"), item.get("ja_name"), item.get("zh_name"), item.get("en_name"),
|
||||
item.get("url"), item.get("pic"), item.get("Height"), item.get("Weight"),
|
||||
item.get("Breast size"), item.get("Breast factor"), item.get("Hair color"),
|
||||
item.get("Eye color"), item.get("Birth date"), item.get("Ethnicity"),
|
||||
item.get("Birth place")
|
||||
))
|
||||
except sqlite3.Error as e:
|
||||
print(f"[ERROR] 插入数据时发生错误: {e}")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def load_json(file_path):
|
||||
"""读取 JSON 文件并返回数据"""
|
||||
if not os.path.exists(file_path):
|
||||
print("[ERROR] JSON 文件不存在!")
|
||||
return []
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
try:
|
||||
data = json.load(f)
|
||||
return data
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"[ERROR] 解析 JSON 文件失败: {e}")
|
||||
return []
|
||||
|
||||
if __name__ == "__main__":
|
||||
#create_table()
|
||||
json_data = load_json("./result/models_detail.json")
|
||||
if json_data:
|
||||
insert_data(json_data)
|
||||
print("[INFO] 数据导入完成!")
|
||||
Reference in New Issue
Block a user