modify some scripts.

This commit is contained in:
2025-03-04 16:05:47 +08:00
parent 0741ac94ad
commit 3f0a8acb6b
4 changed files with 485 additions and 47 deletions

View File

@ -18,7 +18,7 @@ def get_current_time():
def insert_or_update_performer(data):
try:
cursor.execute("""
INSERT INTO performers (href, name, gender, birthday, astrology, birthplace, years_active, ethnicity, nationality, hair_colors,
INSERT INTO iafd_performers (href, name, gender, birthday, astrology, birthplace, years_active, ethnicity, nationality, hair_colors,
eye_color, height_str, weight_str, measurements, tattoos, piercings, weight, height, movies_cnt, vixen_cnt,
blacked_cnt, tushy_cnt, x_art_cnt, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime'))
@ -54,17 +54,17 @@ def insert_or_update_performer(data):
))
# 获取 performer_id
cursor.execute("SELECT id FROM performers WHERE href = ?", (data["href"],))
cursor.execute("SELECT id FROM iafd_performers WHERE href = ?", (data["href"],))
performer_id = cursor.fetchone()[0]
# 删除旧的 alias
cursor.execute("DELETE FROM performer_aliases WHERE performer_id = ?", (performer_id,))
cursor.execute("DELETE FROM iafd_performer_aliases WHERE performer_id = ?", (performer_id,))
# 插入新的 alias
#for alias in data.get("performer_aka", []):
for alias in data.get("performer_aka") or []:
if alias.lower() != "no known aliases":
cursor.execute("INSERT INTO performer_aliases (performer_id, alias) VALUES (?, ?) ON CONFLICT(performer_id, alias) DO NOTHING ", (performer_id, alias))
cursor.execute("INSERT INTO iafd_performer_aliases (performer_id, alias) VALUES (?, ?) ON CONFLICT(performer_id, alias) DO NOTHING ", (performer_id, alias))
conn.commit()
logging.debug(f"成功插入/更新演员: {data['person']}")
@ -83,9 +83,9 @@ def insert_or_update_performer(data):
def delete_performer(identifier):
try:
if isinstance(identifier, int):
cursor.execute("DELETE FROM performers WHERE id = ?", (identifier,))
cursor.execute("DELETE FROM iafd_performers WHERE id = ?", (identifier,))
elif isinstance(identifier, str):
cursor.execute("DELETE FROM performers WHERE href = ?", (identifier,))
cursor.execute("DELETE FROM iafd_performers WHERE href = ?", (identifier,))
else:
logging.warning("无效的删除参数")
return
@ -100,15 +100,15 @@ def delete_performer(identifier):
def query_performer(identifier):
try:
if isinstance(identifier, int):
cursor.execute("SELECT * FROM performers WHERE id = ?", (identifier,))
cursor.execute("SELECT * FROM iafd_performers WHERE id = ?", (identifier,))
elif "http" in identifier:
cursor.execute("SELECT * FROM performers WHERE href = ?", (identifier,))
cursor.execute("SELECT * FROM iafd_performers WHERE href = ?", (identifier,))
else:
cursor.execute("SELECT * FROM performers WHERE name LIKE ?", (f"%{identifier}%",))
cursor.execute("SELECT * FROM iafd_performers WHERE name LIKE ?", (f"%{identifier}%",))
performer = cursor.fetchone()
if performer:
cursor.execute("SELECT alias FROM performer_aliases WHERE performer_id = ?", (performer[0],))
cursor.execute("SELECT alias FROM iafd_performer_aliases WHERE performer_id = ?", (performer[0],))
aliases = [row[0] for row in cursor.fetchall()]
result = dict(zip([desc[0] for desc in cursor.description], performer))
result["performer_aka"] = aliases
@ -124,7 +124,7 @@ def query_performer(identifier):
# 按条件查询 href 列表
def query_performer_hrefs(**filters):
try:
sql = "SELECT href FROM performers WHERE 1=1"
sql = "SELECT href FROM iafd_performers WHERE 1=1"
params = []
if "id" in filters:
@ -149,7 +149,7 @@ def query_performer_hrefs(**filters):
def insert_or_update_distributor(data):
try:
cursor.execute("""
INSERT INTO distributors (name, href, updated_at)
INSERT INTO iafd_distributors (name, href, updated_at)
VALUES (?, ? , datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET
name = excluded.name,
@ -158,7 +158,7 @@ def insert_or_update_distributor(data):
conn.commit()
# 获取 performer_id
cursor.execute("SELECT id FROM distributors WHERE href = ?", (data["href"],))
cursor.execute("SELECT id FROM iafd_distributors WHERE href = ?", (data["href"],))
dist_id = cursor.fetchone()[0]
if dist_id:
logging.debug(f"成功插入/更新发行商: {data['name']}")
@ -174,9 +174,9 @@ def insert_or_update_distributor(data):
def delete_distributor(identifier):
try:
if isinstance(identifier, int):
cursor.execute("DELETE FROM distributors WHERE id = ?", (identifier,))
cursor.execute("DELETE FROM iafd_distributors WHERE id = ?", (identifier,))
elif isinstance(identifier, str):
cursor.execute("DELETE FROM distributors WHERE name = ?", (identifier,))
cursor.execute("DELETE FROM iafd_distributors WHERE name = ?", (identifier,))
conn.commit()
logging.info(f"成功删除发行商: {identifier}")
except sqlite3.Error as e:
@ -187,9 +187,9 @@ def delete_distributor(identifier):
def query_distributor(identifier):
try:
if isinstance(identifier, int):
cursor.execute("SELECT * FROM distributors WHERE id = ?", (identifier,))
cursor.execute("SELECT * FROM iafd_distributors WHERE id = ?", (identifier,))
else:
cursor.execute("SELECT * FROM distributors WHERE name LIKE ?", (f"%{identifier}%",))
cursor.execute("SELECT * FROM iafd_distributors WHERE name LIKE ?", (f"%{identifier}%",))
distributor = cursor.fetchone()
if distributor:
@ -204,7 +204,7 @@ def query_distributor(identifier):
# 按条件查询 href 列表
def query_distributor_hrefs(**filters):
try:
sql = "SELECT href FROM distributors WHERE 1=1"
sql = "SELECT href FROM iafd_distributors WHERE 1=1"
params = []
if "id" in filters:
@ -228,7 +228,7 @@ def query_distributor_hrefs(**filters):
def insert_or_update_studio(data):
try:
cursor.execute("""
INSERT INTO studios (name, href, updated_at)
INSERT INTO iafd_studios (name, href, updated_at)
VALUES (?, ?, datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET
name = excluded.name,
@ -237,7 +237,7 @@ def insert_or_update_studio(data):
conn.commit()
# 获取 performer_id
cursor.execute("SELECT id FROM studios WHERE href = ?", (data["href"],))
cursor.execute("SELECT id FROM iafd_studios WHERE href = ?", (data["href"],))
stu_id = cursor.fetchone()[0]
if stu_id:
logging.debug(f"成功插入/更新发行商: {data['name']}")
@ -253,9 +253,9 @@ def insert_or_update_studio(data):
def delete_studio(identifier):
try:
if isinstance(identifier, int):
cursor.execute("DELETE FROM studios WHERE id = ?", (identifier,))
cursor.execute("DELETE FROM iafd_studios WHERE id = ?", (identifier,))
elif isinstance(identifier, str):
cursor.execute("DELETE FROM studios WHERE name = ?", (identifier,))
cursor.execute("DELETE FROM iafd_studios WHERE name = ?", (identifier,))
conn.commit()
logging.info(f"成功删除制作公司: {identifier}")
except sqlite3.Error as e:
@ -266,9 +266,9 @@ def delete_studio(identifier):
def query_studio(identifier):
try:
if isinstance(identifier, int):
cursor.execute("SELECT * FROM studios WHERE id = ?", (identifier,))
cursor.execute("SELECT * FROM iafd_studios WHERE id = ?", (identifier,))
else:
cursor.execute("SELECT * FROM studios WHERE name LIKE ?", (f"%{identifier}%",))
cursor.execute("SELECT * FROM iafd_studios WHERE name LIKE ?", (f"%{identifier}%",))
studio = cursor.fetchone()
if studio:
@ -283,7 +283,7 @@ def query_studio(identifier):
# 按条件查询 href 列表
def query_studio_hrefs(**filters):
try:
sql = "SELECT href FROM studios WHERE 1=1"
sql = "SELECT href FROM iafd_studios WHERE 1=1"
params = []
if "id" in filters:
@ -313,14 +313,14 @@ def get_id_by_href(table: str, href: str) -> int:
def insert_or_update_movie(movie_data):
try:
# 获取相关 ID
distributor_id = get_id_by_href('distributors', movie_data['DistributorHref'])
studio_id = get_id_by_href('studios', movie_data['StudioHref'])
director_id = get_id_by_href('performers', movie_data['DirectorHref'])
distributor_id = get_id_by_href('iafd_distributors', movie_data['DistributorHref'])
studio_id = get_id_by_href('iafd_studios', movie_data['StudioHref'])
director_id = get_id_by_href('iafd_performers', movie_data['DirectorHref'])
# 插入或更新电影信息
cursor.execute(
"""
INSERT INTO movies (title, minutes, distributor_id, studio_id, release_date, added_to_IAFD_date,
INSERT INTO iafd_movies (title, minutes, distributor_id, studio_id, release_date, added_to_IAFD_date,
all_girl, all_male, compilation, webscene, director_id, href, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET
@ -338,17 +338,17 @@ def insert_or_update_movie(movie_data):
logging.debug("Movie inserted/updated: %s", movie_data['title'])
# 获取插入的 movie_id
cursor.execute("SELECT id FROM movies WHERE href = ?", (movie_data['href'],))
cursor.execute("SELECT id FROM iafd_movies WHERE href = ?", (movie_data['href'],))
movie_id = cursor.fetchone()[0]
# 插入 performers_movies 关系表
for performer in movie_data.get('Performers', []):
performer_id = get_id_by_href('performers', performer['href'])
performer_id = get_id_by_href('iafd_performers', performer['href'])
if performer_id:
notes = '|'.join(performer['tags'])
cursor.execute(
"""
INSERT INTO performers_movies (performer_id, movie_id, role, notes)
INSERT INTO iafd_performers_movies (performer_id, movie_id, role, notes)
VALUES (?, ?, ?, ?)
ON CONFLICT(movie_id, performer_id) DO UPDATE SET notes=excluded.notes
""",
@ -360,11 +360,11 @@ def insert_or_update_movie(movie_data):
# 插入 movies_appers_in 表
for appears in movie_data.get("AppearsIn", []):
appears_in_id = get_id_by_href('movies', appears['href'])
appears_in_id = get_id_by_href('iafd_movies', appears['href'])
if appears_in_id:
appears_in_id = appears_in_id[0]
cursor.execute("""
INSERT INTO movies_appers_in (movie_id, appears_in_id, gradation, notes)
INSERT INTO iafd_movies_appers_in (movie_id, appears_in_id, gradation, notes)
VALUES (?, ?, ?, ?)
ON CONFLICT(movie_id, appears_in_id) DO NOTHING
""", (movie_id, appears_in_id, 1, appears["title"]))
@ -383,9 +383,9 @@ def insert_or_update_movie(movie_data):
def delete_movie(identifier):
try:
if isinstance(identifier, int):
cursor.execute("DELETE FROM movies WHERE id = ?", (identifier,))
cursor.execute("DELETE FROM iafd_movies WHERE id = ?", (identifier,))
elif isinstance(identifier, str):
cursor.execute("DELETE FROM movies WHERE href = ?", (identifier,))
cursor.execute("DELETE FROM iafd_movies WHERE href = ?", (identifier,))
else:
logging.warning("无效的删除参数")
return
@ -400,15 +400,15 @@ def delete_movie(identifier):
def query_movies(identifier):
try:
if isinstance(identifier, int):
cursor.execute("SELECT * FROM movies WHERE id = ?", (identifier,))
cursor.execute("SELECT * FROM iafd_movies WHERE id = ?", (identifier,))
elif "http" in identifier:
cursor.execute("SELECT * FROM movies WHERE href = ?", (identifier,))
cursor.execute("SELECT * FROM iafd_movies WHERE href = ?", (identifier,))
else:
cursor.execute("SELECT * FROM movies WHERE title LIKE ?", (f"%{identifier}%",))
cursor.execute("SELECT * FROM iafd_movies WHERE title LIKE ?", (f"%{identifier}%",))
movie = cursor.fetchone()
if movie:
cursor.execute("SELECT * FROM performer_movie WHERE performer_id = ?", (movie[0],))
cursor.execute("SELECT * FROM iafd_performers_movies WHERE performer_id = ?", (movie[0],))
performers = [row[0] for row in cursor.fetchall()]
result = dict(zip([desc[0] for desc in cursor.description], performers))
result["performers"] = performers
@ -424,7 +424,7 @@ def query_movies(identifier):
# 按条件查询 href 列表
def query_movie_hrefs(**filters):
try:
sql = "SELECT href FROM movies WHERE 1=1"
sql = "SELECT href FROM iafd_movies WHERE 1=1"
params = []
if "id" in filters:
@ -448,7 +448,7 @@ def query_movie_hrefs(**filters):
def insert_task_log():
try:
cursor.execute("""
INSERT INTO task_log (task_status) VALUES ('Start')
INSERT INTO iafd_task_log (task_status) VALUES ('Start')
""")
conn.commit()
return cursor.lastrowid # 获取插入的 task_id
@ -462,7 +462,7 @@ def update_task_log(task_id, **kwargs):
fields = ", ".join(f"{key} = ?" for key in kwargs.keys())
params = list(kwargs.values()) + [task_id]
sql = f"UPDATE task_log SET {fields}, updated_at = datetime('now', 'localtime') WHERE task_id = ?"
sql = f"UPDATE iafd_task_log SET {fields}, updated_at = datetime('now', 'localtime') WHERE task_id = ?"
cursor.execute(sql, params)
conn.commit()
except sqlite3.Error as e:
@ -472,16 +472,16 @@ def update_task_log(task_id, **kwargs):
def finalize_task_log(task_id):
try:
# 获取 performers、studios 等表的最终行数
cursor.execute("SELECT COUNT(*) FROM performers")
cursor.execute("SELECT COUNT(*) FROM iafd_performers")
after_performers = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM movies")
cursor.execute("SELECT COUNT(*) FROM iafd_movies")
after_movies = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM distributors")
cursor.execute("SELECT COUNT(*) FROM iafd_distributors")
after_distributors = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM studios")
cursor.execute("SELECT COUNT(*) FROM iafd_studios")
after_studios = cursor.fetchone()[0]
# 更新 task_log

100
scripts/javhd/tools.py Normal file
View File

@ -0,0 +1,100 @@
import json
import sqlite3
import os
from datetime import datetime
db_path = "/root/sharedata/shared.db"
def create_table():
"""创建 SQLite 数据表"""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS javhd_models (
id INTEGER PRIMARY KEY AUTOINCREMENT,
rank INTEGER,
ja_name TEXT,
zh_name TEXT,
en_name TEXT,
url TEXT UNIQUE,
pic TEXT,
height TEXT,
weight TEXT,
breast_size TEXT,
breast_factor TEXT,
hair_color TEXT,
eye_color TEXT,
birth_date TEXT,
ethnicity TEXT,
birth_place TEXT,
created_at TEXT DEFAULT (datetime('now', 'localtime')),
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
)
''')
conn.commit()
conn.close()
def insert_data(data):
"""插入 JSON 数据到数据库,处理冲突情况"""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
sql = '''
INSERT INTO javhd_models (
rank, ja_name, zh_name, en_name, url, pic, height, weight,
breast_size, breast_factor, hair_color, eye_color, birth_date,
ethnicity, birth_place, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime'))
ON CONFLICT(url) DO UPDATE SET
rank=excluded.rank,
ja_name=excluded.ja_name,
zh_name=excluded.zh_name,
en_name=excluded.en_name,
pic=excluded.pic,
height=excluded.height,
weight=excluded.weight,
breast_size=excluded.breast_size,
breast_factor=excluded.breast_factor,
hair_color=excluded.hair_color,
eye_color=excluded.eye_color,
birth_date=excluded.birth_date,
ethnicity=excluded.ethnicity,
birth_place=excluded.birth_place,
updated_at=datetime('now', 'localtime');
'''
for item in data:
try:
cursor.execute(sql, (
item.get("rank"), item.get("ja_name"), item.get("zh_name"), item.get("en_name"),
item.get("url"), item.get("pic"), item.get("Height"), item.get("Weight"),
item.get("Breast size"), item.get("Breast factor"), item.get("Hair color"),
item.get("Eye color"), item.get("Birth date"), item.get("Ethnicity"),
item.get("Birth place")
))
except sqlite3.Error as e:
print(f"[ERROR] 插入数据时发生错误: {e}")
conn.commit()
conn.close()
def load_json(file_path):
"""读取 JSON 文件并返回数据"""
if not os.path.exists(file_path):
print("[ERROR] JSON 文件不存在!")
return []
with open(file_path, "r", encoding="utf-8") as f:
try:
data = json.load(f)
return data
except json.JSONDecodeError as e:
print(f"[ERROR] 解析 JSON 文件失败: {e}")
return []
if __name__ == "__main__":
create_table()
json_data = load_json("./result/models_detail.json")
if json_data:
insert_data(json_data)
print("[INFO] 数据导入完成!")

172
scripts/schema.sql Normal file
View File

@ -0,0 +1,172 @@
CREATE TABLE sqlite_sequence(name,seq);
CREATE TABLE IF NOT EXISTS "iafd_performers" (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
gender TEXT,
birthday TEXT,
astrology TEXT,
birthplace TEXT,
years_active TEXT,
ethnicity TEXT,
nationality TEXT,
hair_colors TEXT,
eye_color TEXT,
height_str TEXT,
weight_str TEXT,
measurements TEXT,
tattoos TEXT,
piercings TEXT,
fake_tits TEXT,
href TEXT UNIQUE,
created_at TEXT DEFAULT (datetime('now', 'localtime')),
updated_at TEXT DEFAULT (datetime('now', 'localtime')),
weight INTEGER,
height INTEGER,
rating INTEGER,
movies_cnt INTEGER,
vixen_cnt INTEGER,
blacked_cnt INTEGER,
tushy_cnt INTEGER,
x_art_cnt INTEGER
);
CREATE TABLE IF NOT EXISTS "iafd_performer_aliases" (
`performer_id` integer NOT NULL,
`alias` varchar(255) NOT NULL,
foreign key(`performer_id`) references "iafd_performers"(`id`) on delete CASCADE,
PRIMARY KEY(`performer_id`, `alias`)
);
CREATE TABLE IF NOT EXISTS "iafd_movies_appers_in" (
`movie_id` integer,
`appears_in_id` integer,
`gradation` integer,
`notes` varchar(255),
foreign key(`movie_id`) references "iafd_movies"(`id`) on delete CASCADE,
foreign key(`appears_in_id`) references "iafd_movies"(`id`) on delete CASCADE,
PRIMARY KEY (`movie_id`, `appears_in_id`)
);
CREATE TABLE IF NOT EXISTS "iafd_performer_urls" (
`performer_id` integer NOT NULL,
`position` varchar(255) NOT NULL,
`url` varchar(255) NOT NULL,
foreign key(`performer_id`) references "iafd_performers"(`id`) on delete CASCADE,
PRIMARY KEY(`performer_id`, `position`, `url`)
);
CREATE TABLE IF NOT EXISTS "iafd_distributors" (
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
`name` VARCHAR(255) NOT NULL,
`href` VARCHAR(255) UNIQUE,
`parent_id` INTEGER DEFAULT NULL CHECK (`id` IS NOT `parent_id`) REFERENCES "iafd_distributors"(`id`) ON DELETE SET NULL,
`created_at` TEXT DEFAULT (datetime('now', 'localtime')),
`updated_at` TEXT DEFAULT (datetime('now', 'localtime')),
`details` TEXT
);
CREATE TABLE IF NOT EXISTS "iafd_studios" (
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
`name` VARCHAR(255) NOT NULL,
`href` VARCHAR(255) UNIQUE,
`parent_id` INTEGER DEFAULT NULL CHECK (`id` IS NOT `parent_id`) REFERENCES "iafd_studios"(`id`) ON DELETE SET NULL,
`created_at` TEXT DEFAULT (datetime('now', 'localtime')),
`updated_at` TEXT DEFAULT (datetime('now', 'localtime')),
`details` TEXT
);
CREATE TABLE IF NOT EXISTS "iafd_performers_movies" (
`performer_id` integer,
`movie_id` integer,
`role` varchar(255),
`notes` varchar(255),
`created_at` TEXT DEFAULT (datetime('now', 'localtime')),
foreign key(`performer_id`) references "iafd_performers"(`id`) on delete CASCADE,
foreign key(`movie_id`) references "iafd_movies"(`id`) on delete CASCADE,
PRIMARY KEY (`movie_id`, `performer_id`)
);
CREATE TABLE IF NOT EXISTS "iafd_task_log" (
`task_id` integer not null primary key autoincrement,
`before_performers` integer,
`new_performers` integer,
`after_performers` integer,
`before_movies` integer,
`new_movies` integer,
`after_movies` integer,
`before_distributors` integer,
`new_distributors` integer,
`after_distributors` integer,
`before_studios` integer,
`new_studios` integer,
`after_studios` integer,
`task_status` varchar(255),
`created_at` TEXT DEFAULT (datetime('now', 'localtime')),
`updated_at` TEXT DEFAULT (datetime('now', 'localtime'))
);
CREATE TABLE IF NOT EXISTS "iafd_movies" (
`id` integer not null primary key autoincrement,
`title` varchar(255),
`minutes` varchar(255),
`distributor_id` integer,
`studio_id` integer,
`release_date` varchar(255),
`added_to_IAFD_date` varchar(255),
`all_girl` varchar(255),
`all_male` varchar(255),
`compilation` varchar(255),
`webscene` varchar(255),
`director_id` integer,
`href` varchar(255) UNIQUE,
`created_at` TEXT DEFAULT (datetime('now', 'localtime')),
`updated_at` TEXT DEFAULT (datetime('now', 'localtime')),
foreign key(`studio_id`) references "iafd_studios"(`id`) on delete SET NULL,
foreign key(`distributor_id`) references "iafd_distributors"(`id`) on delete SET NULL
);
CREATE TABLE javhd_models (
id INTEGER PRIMARY KEY AUTOINCREMENT,
rank INTEGER,
ja_name TEXT,
zh_name TEXT,
en_name TEXT,
url TEXT UNIQUE,
pic TEXT,
height TEXT,
weight TEXT,
breast_size TEXT,
breast_factor TEXT,
hair_color TEXT,
eye_color TEXT,
birth_date TEXT,
ethnicity TEXT,
birth_place TEXT,
created_at TEXT DEFAULT (datetime('now', 'localtime')),
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
);
CREATE TABLE thelordofporn_actress (
id INTEGER PRIMARY KEY AUTOINCREMENT,
pornstar TEXT,
rating REAL,
rank INTEGER,
votes INTEGER,
href TEXT UNIQUE,
career_start TEXT,
measurements TEXT,
born TEXT,
height TEXT,
weight TEXT,
date_modified TEXT,
global_rank INTEGER,
weekly_rank INTEGER,
last_month_rating REAL,
current_rating REAL,
total_votes INTEGER,
birth_date TEXT,
birth_year TEXT,
birth_place TEXT,
height_ft TEXT,
height_cm TEXT,
weight_lbs TEXT,
weight_kg TEXT,
created_at TEXT DEFAULT (datetime('now', 'localtime')),
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
);
CREATE TABLE thelordofporn_alias (
actress_id INTEGER NOT NULL,
alias TEXT NOT NULL,
FOREIGN KEY (actress_id) REFERENCES thelordofporn_actress(id) ON DELETE CASCADE,
PRIMARY KEY(`actress_id`, `alias`)
);

View File

@ -0,0 +1,166 @@
import sqlite3
import json
import re
import logging
from datetime import datetime
def setup_logging():
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
db_path = "/root/sharedata/shared.db"
def connect_db(db_name=db_path):
return sqlite3.connect(db_name)
def create_tables(conn):
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS thelordofporn_actress (
id INTEGER PRIMARY KEY AUTOINCREMENT,
pornstar TEXT,
rating REAL,
rank INTEGER,
votes INTEGER,
href TEXT UNIQUE,
career_start TEXT,
measurements TEXT,
born TEXT,
height TEXT,
weight TEXT,
date_modified TEXT,
global_rank INTEGER,
weekly_rank INTEGER,
last_month_rating REAL,
current_rating REAL,
total_votes INTEGER,
birth_date TEXT,
birth_year TEXT,
birth_place TEXT,
height_ft TEXT,
height_cm TEXT,
weight_lbs TEXT,
weight_kg TEXT,
created_at TEXT DEFAULT (datetime('now', 'localtime')),
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS thelordofporn_alias (
actress_id INTEGER NOT NULL,
alias TEXT NOT NULL,
FOREIGN KEY (actress_id) REFERENCES thelordofporn_actress(id) ON DELETE CASCADE,
PRIMARY KEY(`actress_id`, `alias`)
);
''')
conn.commit()
def load_json(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
logging.error(f"Failed to load JSON file: {e}")
return []
def clean_alias(alias):
alias = re.sub(r'\(Age \d+\)', '', alias) # 去掉 (Age XX)
return [name.strip() for name in alias.split(',') if name.strip()]
def parse_numeric(value):
try:
return float(value)
except (ValueError, TypeError):
return 0 # 默认值为 0
def insert_actress(conn, actress):
cursor = conn.cursor()
# 插入 thelordofporn_actress 表
cursor.execute('''
INSERT INTO thelordofporn_actress (
pornstar, rating, rank, votes, href, career_start, measurements, born,
height, weight, date_modified, global_rank, weekly_rank,
last_month_rating, current_rating, total_votes,
birth_date, birth_year, birth_place, height_ft, height_cm,
weight_lbs, weight_kg, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET
rating=excluded.rating,
rank=excluded.rank,
votes=excluded.votes,
career_start=excluded.career_start,
measurements=excluded.measurements,
born=excluded.born,
height=excluded.height,
weight=excluded.weight,
date_modified=excluded.date_modified,
global_rank=excluded.global_rank,
weekly_rank=excluded.weekly_rank,
last_month_rating=excluded.last_month_rating,
current_rating=excluded.current_rating,
total_votes=excluded.total_votes,
birth_date=excluded.birth_date,
birth_year=excluded.birth_year,
birth_place=excluded.birth_place,
height_ft=excluded.height_ft,
height_cm=excluded.height_cm,
weight_lbs=excluded.weight_lbs,
weight_kg=excluded.weight_kg,
updated_at=datetime('now', 'localtime');
''', (
actress.get('pornstar', ''),
parse_numeric(actress.get('rating', 0)),
parse_numeric(actress.get('rank', 0)),
parse_numeric(actress.get('votes', 0)),
actress.get('href', ''),
actress.get('career_start', ''),
actress.get('measurements', ''),
actress.get('born', ''),
actress.get('height', ''),
actress.get('weight', ''),
actress.get('date_modified', ''),
parse_numeric(actress.get('global_rank', 0)),
parse_numeric(actress.get('weekly_rank', 0)),
parse_numeric(actress.get('last_month_rating', 0)),
parse_numeric(actress.get('current_rating', 0)),
parse_numeric(actress.get('total_votes', 0)),
actress.get('birth_date', ''),
str(actress.get('birth_year', '')),
actress.get('birth_place', ''),
actress.get('height_ft', ''),
str(actress.get('height_cm', '')),
str(actress.get('weight_lbs', '')),
str(actress.get('weight_kg', ''))
))
actress_id = cursor.lastrowid if cursor.lastrowid else cursor.execute("SELECT id FROM thelordofporn_actress WHERE href = ?", (actress.get('href', ''),)).fetchone()[0]
# 插入 thelordofporn_alias 表
if 'alias' in actress:
aliases = clean_alias(actress['alias'])
cursor.execute("DELETE FROM thelordofporn_alias WHERE actress_id = ?", (actress_id,))
for alias in aliases:
cursor.execute("INSERT INTO thelordofporn_alias (actress_id, alias) VALUES (?, ?) ON CONFLICT(actress_id, alias) DO NOTHING ", (actress_id, alias))
conn.commit()
def main():
setup_logging()
conn = connect_db()
create_tables(conn)
actresses = load_json("./result/actress_detail.json")
if actresses:
for actress in actresses:
try:
insert_actress(conn, actress)
logging.info(f"Inserted/Updated: {actress.get('pornstar', 'Unknown')}")
except Exception as e:
logging.error(f"Error inserting actress: {e}")
else:
logging.warning("No data to insert.")
conn.close()
if __name__ == "__main__":
main()