diff --git a/scripts/iafd/src/sqlite_utils.py b/scripts/iafd/src/sqlite_utils.py index fe54b78..b2dbf58 100644 --- a/scripts/iafd/src/sqlite_utils.py +++ b/scripts/iafd/src/sqlite_utils.py @@ -18,7 +18,7 @@ def get_current_time(): def insert_or_update_performer(data): try: cursor.execute(""" - INSERT INTO performers (href, name, gender, birthday, astrology, birthplace, years_active, ethnicity, nationality, hair_colors, + INSERT INTO iafd_performers (href, name, gender, birthday, astrology, birthplace, years_active, ethnicity, nationality, hair_colors, eye_color, height_str, weight_str, measurements, tattoos, piercings, weight, height, movies_cnt, vixen_cnt, blacked_cnt, tushy_cnt, x_art_cnt, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime')) @@ -54,17 +54,17 @@ def insert_or_update_performer(data): )) # 获取 performer_id - cursor.execute("SELECT id FROM performers WHERE href = ?", (data["href"],)) + cursor.execute("SELECT id FROM iafd_performers WHERE href = ?", (data["href"],)) performer_id = cursor.fetchone()[0] # 删除旧的 alias - cursor.execute("DELETE FROM performer_aliases WHERE performer_id = ?", (performer_id,)) + cursor.execute("DELETE FROM iafd_performer_aliases WHERE performer_id = ?", (performer_id,)) # 插入新的 alias #for alias in data.get("performer_aka", []): for alias in data.get("performer_aka") or []: if alias.lower() != "no known aliases": - cursor.execute("INSERT INTO performer_aliases (performer_id, alias) VALUES (?, ?) ON CONFLICT(performer_id, alias) DO NOTHING ", (performer_id, alias)) + cursor.execute("INSERT INTO iafd_performer_aliases (performer_id, alias) VALUES (?, ?) ON CONFLICT(performer_id, alias) DO NOTHING ", (performer_id, alias)) conn.commit() logging.debug(f"成功插入/更新演员: {data['person']}") @@ -83,9 +83,9 @@ def insert_or_update_performer(data): def delete_performer(identifier): try: if isinstance(identifier, int): - cursor.execute("DELETE FROM performers WHERE id = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_performers WHERE id = ?", (identifier,)) elif isinstance(identifier, str): - cursor.execute("DELETE FROM performers WHERE href = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_performers WHERE href = ?", (identifier,)) else: logging.warning("无效的删除参数") return @@ -100,15 +100,15 @@ def delete_performer(identifier): def query_performer(identifier): try: if isinstance(identifier, int): - cursor.execute("SELECT * FROM performers WHERE id = ?", (identifier,)) + cursor.execute("SELECT * FROM iafd_performers WHERE id = ?", (identifier,)) elif "http" in identifier: - cursor.execute("SELECT * FROM performers WHERE href = ?", (identifier,)) + cursor.execute("SELECT * FROM iafd_performers WHERE href = ?", (identifier,)) else: - cursor.execute("SELECT * FROM performers WHERE name LIKE ?", (f"%{identifier}%",)) + cursor.execute("SELECT * FROM iafd_performers WHERE name LIKE ?", (f"%{identifier}%",)) performer = cursor.fetchone() if performer: - cursor.execute("SELECT alias FROM performer_aliases WHERE performer_id = ?", (performer[0],)) + cursor.execute("SELECT alias FROM iafd_performer_aliases WHERE performer_id = ?", (performer[0],)) aliases = [row[0] for row in cursor.fetchall()] result = dict(zip([desc[0] for desc in cursor.description], performer)) result["performer_aka"] = aliases @@ -124,7 +124,7 @@ def query_performer(identifier): # 按条件查询 href 列表 def query_performer_hrefs(**filters): try: - sql = "SELECT href FROM performers WHERE 1=1" + sql = "SELECT href FROM iafd_performers WHERE 1=1" params = [] if "id" in filters: @@ -149,7 +149,7 @@ def query_performer_hrefs(**filters): def insert_or_update_distributor(data): try: cursor.execute(""" - INSERT INTO distributors (name, href, updated_at) + INSERT INTO iafd_distributors (name, href, updated_at) VALUES (?, ? , datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET name = excluded.name, @@ -158,7 +158,7 @@ def insert_or_update_distributor(data): conn.commit() # 获取 performer_id - cursor.execute("SELECT id FROM distributors WHERE href = ?", (data["href"],)) + cursor.execute("SELECT id FROM iafd_distributors WHERE href = ?", (data["href"],)) dist_id = cursor.fetchone()[0] if dist_id: logging.debug(f"成功插入/更新发行商: {data['name']}") @@ -174,9 +174,9 @@ def insert_or_update_distributor(data): def delete_distributor(identifier): try: if isinstance(identifier, int): - cursor.execute("DELETE FROM distributors WHERE id = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_distributors WHERE id = ?", (identifier,)) elif isinstance(identifier, str): - cursor.execute("DELETE FROM distributors WHERE name = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_distributors WHERE name = ?", (identifier,)) conn.commit() logging.info(f"成功删除发行商: {identifier}") except sqlite3.Error as e: @@ -187,9 +187,9 @@ def delete_distributor(identifier): def query_distributor(identifier): try: if isinstance(identifier, int): - cursor.execute("SELECT * FROM distributors WHERE id = ?", (identifier,)) + cursor.execute("SELECT * FROM iafd_distributors WHERE id = ?", (identifier,)) else: - cursor.execute("SELECT * FROM distributors WHERE name LIKE ?", (f"%{identifier}%",)) + cursor.execute("SELECT * FROM iafd_distributors WHERE name LIKE ?", (f"%{identifier}%",)) distributor = cursor.fetchone() if distributor: @@ -204,7 +204,7 @@ def query_distributor(identifier): # 按条件查询 href 列表 def query_distributor_hrefs(**filters): try: - sql = "SELECT href FROM distributors WHERE 1=1" + sql = "SELECT href FROM iafd_distributors WHERE 1=1" params = [] if "id" in filters: @@ -228,7 +228,7 @@ def query_distributor_hrefs(**filters): def insert_or_update_studio(data): try: cursor.execute(""" - INSERT INTO studios (name, href, updated_at) + INSERT INTO iafd_studios (name, href, updated_at) VALUES (?, ?, datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET name = excluded.name, @@ -237,7 +237,7 @@ def insert_or_update_studio(data): conn.commit() # 获取 performer_id - cursor.execute("SELECT id FROM studios WHERE href = ?", (data["href"],)) + cursor.execute("SELECT id FROM iafd_studios WHERE href = ?", (data["href"],)) stu_id = cursor.fetchone()[0] if stu_id: logging.debug(f"成功插入/更新发行商: {data['name']}") @@ -253,9 +253,9 @@ def insert_or_update_studio(data): def delete_studio(identifier): try: if isinstance(identifier, int): - cursor.execute("DELETE FROM studios WHERE id = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_studios WHERE id = ?", (identifier,)) elif isinstance(identifier, str): - cursor.execute("DELETE FROM studios WHERE name = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_studios WHERE name = ?", (identifier,)) conn.commit() logging.info(f"成功删除制作公司: {identifier}") except sqlite3.Error as e: @@ -266,9 +266,9 @@ def delete_studio(identifier): def query_studio(identifier): try: if isinstance(identifier, int): - cursor.execute("SELECT * FROM studios WHERE id = ?", (identifier,)) + cursor.execute("SELECT * FROM iafd_studios WHERE id = ?", (identifier,)) else: - cursor.execute("SELECT * FROM studios WHERE name LIKE ?", (f"%{identifier}%",)) + cursor.execute("SELECT * FROM iafd_studios WHERE name LIKE ?", (f"%{identifier}%",)) studio = cursor.fetchone() if studio: @@ -283,7 +283,7 @@ def query_studio(identifier): # 按条件查询 href 列表 def query_studio_hrefs(**filters): try: - sql = "SELECT href FROM studios WHERE 1=1" + sql = "SELECT href FROM iafd_studios WHERE 1=1" params = [] if "id" in filters: @@ -313,14 +313,14 @@ def get_id_by_href(table: str, href: str) -> int: def insert_or_update_movie(movie_data): try: # 获取相关 ID - distributor_id = get_id_by_href('distributors', movie_data['DistributorHref']) - studio_id = get_id_by_href('studios', movie_data['StudioHref']) - director_id = get_id_by_href('performers', movie_data['DirectorHref']) + distributor_id = get_id_by_href('iafd_distributors', movie_data['DistributorHref']) + studio_id = get_id_by_href('iafd_studios', movie_data['StudioHref']) + director_id = get_id_by_href('iafd_performers', movie_data['DirectorHref']) # 插入或更新电影信息 cursor.execute( """ - INSERT INTO movies (title, minutes, distributor_id, studio_id, release_date, added_to_IAFD_date, + INSERT INTO iafd_movies (title, minutes, distributor_id, studio_id, release_date, added_to_IAFD_date, all_girl, all_male, compilation, webscene, director_id, href, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET @@ -338,17 +338,17 @@ def insert_or_update_movie(movie_data): logging.debug("Movie inserted/updated: %s", movie_data['title']) # 获取插入的 movie_id - cursor.execute("SELECT id FROM movies WHERE href = ?", (movie_data['href'],)) + cursor.execute("SELECT id FROM iafd_movies WHERE href = ?", (movie_data['href'],)) movie_id = cursor.fetchone()[0] # 插入 performers_movies 关系表 for performer in movie_data.get('Performers', []): - performer_id = get_id_by_href('performers', performer['href']) + performer_id = get_id_by_href('iafd_performers', performer['href']) if performer_id: notes = '|'.join(performer['tags']) cursor.execute( """ - INSERT INTO performers_movies (performer_id, movie_id, role, notes) + INSERT INTO iafd_performers_movies (performer_id, movie_id, role, notes) VALUES (?, ?, ?, ?) ON CONFLICT(movie_id, performer_id) DO UPDATE SET notes=excluded.notes """, @@ -360,11 +360,11 @@ def insert_or_update_movie(movie_data): # 插入 movies_appers_in 表 for appears in movie_data.get("AppearsIn", []): - appears_in_id = get_id_by_href('movies', appears['href']) + appears_in_id = get_id_by_href('iafd_movies', appears['href']) if appears_in_id: appears_in_id = appears_in_id[0] cursor.execute(""" - INSERT INTO movies_appers_in (movie_id, appears_in_id, gradation, notes) + INSERT INTO iafd_movies_appers_in (movie_id, appears_in_id, gradation, notes) VALUES (?, ?, ?, ?) ON CONFLICT(movie_id, appears_in_id) DO NOTHING """, (movie_id, appears_in_id, 1, appears["title"])) @@ -383,9 +383,9 @@ def insert_or_update_movie(movie_data): def delete_movie(identifier): try: if isinstance(identifier, int): - cursor.execute("DELETE FROM movies WHERE id = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_movies WHERE id = ?", (identifier,)) elif isinstance(identifier, str): - cursor.execute("DELETE FROM movies WHERE href = ?", (identifier,)) + cursor.execute("DELETE FROM iafd_movies WHERE href = ?", (identifier,)) else: logging.warning("无效的删除参数") return @@ -400,15 +400,15 @@ def delete_movie(identifier): def query_movies(identifier): try: if isinstance(identifier, int): - cursor.execute("SELECT * FROM movies WHERE id = ?", (identifier,)) + cursor.execute("SELECT * FROM iafd_movies WHERE id = ?", (identifier,)) elif "http" in identifier: - cursor.execute("SELECT * FROM movies WHERE href = ?", (identifier,)) + cursor.execute("SELECT * FROM iafd_movies WHERE href = ?", (identifier,)) else: - cursor.execute("SELECT * FROM movies WHERE title LIKE ?", (f"%{identifier}%",)) + cursor.execute("SELECT * FROM iafd_movies WHERE title LIKE ?", (f"%{identifier}%",)) movie = cursor.fetchone() if movie: - cursor.execute("SELECT * FROM performer_movie WHERE performer_id = ?", (movie[0],)) + cursor.execute("SELECT * FROM iafd_performers_movies WHERE performer_id = ?", (movie[0],)) performers = [row[0] for row in cursor.fetchall()] result = dict(zip([desc[0] for desc in cursor.description], performers)) result["performers"] = performers @@ -424,7 +424,7 @@ def query_movies(identifier): # 按条件查询 href 列表 def query_movie_hrefs(**filters): try: - sql = "SELECT href FROM movies WHERE 1=1" + sql = "SELECT href FROM iafd_movies WHERE 1=1" params = [] if "id" in filters: @@ -448,7 +448,7 @@ def query_movie_hrefs(**filters): def insert_task_log(): try: cursor.execute(""" - INSERT INTO task_log (task_status) VALUES ('Start') + INSERT INTO iafd_task_log (task_status) VALUES ('Start') """) conn.commit() return cursor.lastrowid # 获取插入的 task_id @@ -462,7 +462,7 @@ def update_task_log(task_id, **kwargs): fields = ", ".join(f"{key} = ?" for key in kwargs.keys()) params = list(kwargs.values()) + [task_id] - sql = f"UPDATE task_log SET {fields}, updated_at = datetime('now', 'localtime') WHERE task_id = ?" + sql = f"UPDATE iafd_task_log SET {fields}, updated_at = datetime('now', 'localtime') WHERE task_id = ?" cursor.execute(sql, params) conn.commit() except sqlite3.Error as e: @@ -472,16 +472,16 @@ def update_task_log(task_id, **kwargs): def finalize_task_log(task_id): try: # 获取 performers、studios 等表的最终行数 - cursor.execute("SELECT COUNT(*) FROM performers") + cursor.execute("SELECT COUNT(*) FROM iafd_performers") after_performers = cursor.fetchone()[0] - cursor.execute("SELECT COUNT(*) FROM movies") + cursor.execute("SELECT COUNT(*) FROM iafd_movies") after_movies = cursor.fetchone()[0] - cursor.execute("SELECT COUNT(*) FROM distributors") + cursor.execute("SELECT COUNT(*) FROM iafd_distributors") after_distributors = cursor.fetchone()[0] - cursor.execute("SELECT COUNT(*) FROM studios") + cursor.execute("SELECT COUNT(*) FROM iafd_studios") after_studios = cursor.fetchone()[0] # 更新 task_log diff --git a/scripts/javhd/tools.py b/scripts/javhd/tools.py new file mode 100644 index 0000000..caa54e9 --- /dev/null +++ b/scripts/javhd/tools.py @@ -0,0 +1,100 @@ +import json +import sqlite3 +import os +from datetime import datetime + +db_path = "/root/sharedata/shared.db" + +def create_table(): + """创建 SQLite 数据表""" + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS javhd_models ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + rank INTEGER, + ja_name TEXT, + zh_name TEXT, + en_name TEXT, + url TEXT UNIQUE, + pic TEXT, + height TEXT, + weight TEXT, + breast_size TEXT, + breast_factor TEXT, + hair_color TEXT, + eye_color TEXT, + birth_date TEXT, + ethnicity TEXT, + birth_place TEXT, + created_at TEXT DEFAULT (datetime('now', 'localtime')), + updated_at TEXT DEFAULT (datetime('now', 'localtime')) + ) + ''') + conn.commit() + conn.close() + +def insert_data(data): + """插入 JSON 数据到数据库,处理冲突情况""" + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + sql = ''' + INSERT INTO javhd_models ( + rank, ja_name, zh_name, en_name, url, pic, height, weight, + breast_size, breast_factor, hair_color, eye_color, birth_date, + ethnicity, birth_place, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime')) + ON CONFLICT(url) DO UPDATE SET + rank=excluded.rank, + ja_name=excluded.ja_name, + zh_name=excluded.zh_name, + en_name=excluded.en_name, + pic=excluded.pic, + height=excluded.height, + weight=excluded.weight, + breast_size=excluded.breast_size, + breast_factor=excluded.breast_factor, + hair_color=excluded.hair_color, + eye_color=excluded.eye_color, + birth_date=excluded.birth_date, + ethnicity=excluded.ethnicity, + birth_place=excluded.birth_place, + updated_at=datetime('now', 'localtime'); + ''' + + for item in data: + try: + cursor.execute(sql, ( + item.get("rank"), item.get("ja_name"), item.get("zh_name"), item.get("en_name"), + item.get("url"), item.get("pic"), item.get("Height"), item.get("Weight"), + item.get("Breast size"), item.get("Breast factor"), item.get("Hair color"), + item.get("Eye color"), item.get("Birth date"), item.get("Ethnicity"), + item.get("Birth place") + )) + except sqlite3.Error as e: + print(f"[ERROR] 插入数据时发生错误: {e}") + + conn.commit() + conn.close() + +def load_json(file_path): + """读取 JSON 文件并返回数据""" + if not os.path.exists(file_path): + print("[ERROR] JSON 文件不存在!") + return [] + + with open(file_path, "r", encoding="utf-8") as f: + try: + data = json.load(f) + return data + except json.JSONDecodeError as e: + print(f"[ERROR] 解析 JSON 文件失败: {e}") + return [] + +if __name__ == "__main__": + create_table() + json_data = load_json("./result/models_detail.json") + if json_data: + insert_data(json_data) + print("[INFO] 数据导入完成!") \ No newline at end of file diff --git a/scripts/schema.sql b/scripts/schema.sql new file mode 100644 index 0000000..6b590d6 --- /dev/null +++ b/scripts/schema.sql @@ -0,0 +1,172 @@ +CREATE TABLE sqlite_sequence(name,seq); +CREATE TABLE IF NOT EXISTS "iafd_performers" ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + gender TEXT, + birthday TEXT, + astrology TEXT, + birthplace TEXT, + years_active TEXT, + ethnicity TEXT, + nationality TEXT, + hair_colors TEXT, + eye_color TEXT, + height_str TEXT, + weight_str TEXT, + measurements TEXT, + tattoos TEXT, + piercings TEXT, + fake_tits TEXT, + href TEXT UNIQUE, + created_at TEXT DEFAULT (datetime('now', 'localtime')), + updated_at TEXT DEFAULT (datetime('now', 'localtime')), + weight INTEGER, + height INTEGER, + rating INTEGER, + movies_cnt INTEGER, + vixen_cnt INTEGER, + blacked_cnt INTEGER, + tushy_cnt INTEGER, + x_art_cnt INTEGER +); +CREATE TABLE IF NOT EXISTS "iafd_performer_aliases" ( + `performer_id` integer NOT NULL, + `alias` varchar(255) NOT NULL, + foreign key(`performer_id`) references "iafd_performers"(`id`) on delete CASCADE, + PRIMARY KEY(`performer_id`, `alias`) +); +CREATE TABLE IF NOT EXISTS "iafd_movies_appers_in" ( + `movie_id` integer, + `appears_in_id` integer, + `gradation` integer, + `notes` varchar(255), + foreign key(`movie_id`) references "iafd_movies"(`id`) on delete CASCADE, + foreign key(`appears_in_id`) references "iafd_movies"(`id`) on delete CASCADE, + PRIMARY KEY (`movie_id`, `appears_in_id`) +); +CREATE TABLE IF NOT EXISTS "iafd_performer_urls" ( + `performer_id` integer NOT NULL, + `position` varchar(255) NOT NULL, + `url` varchar(255) NOT NULL, + foreign key(`performer_id`) references "iafd_performers"(`id`) on delete CASCADE, + PRIMARY KEY(`performer_id`, `position`, `url`) +); +CREATE TABLE IF NOT EXISTS "iafd_distributors" ( + `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + `name` VARCHAR(255) NOT NULL, + `href` VARCHAR(255) UNIQUE, + `parent_id` INTEGER DEFAULT NULL CHECK (`id` IS NOT `parent_id`) REFERENCES "iafd_distributors"(`id`) ON DELETE SET NULL, + `created_at` TEXT DEFAULT (datetime('now', 'localtime')), + `updated_at` TEXT DEFAULT (datetime('now', 'localtime')), + `details` TEXT +); +CREATE TABLE IF NOT EXISTS "iafd_studios" ( + `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + `name` VARCHAR(255) NOT NULL, + `href` VARCHAR(255) UNIQUE, + `parent_id` INTEGER DEFAULT NULL CHECK (`id` IS NOT `parent_id`) REFERENCES "iafd_studios"(`id`) ON DELETE SET NULL, + `created_at` TEXT DEFAULT (datetime('now', 'localtime')), + `updated_at` TEXT DEFAULT (datetime('now', 'localtime')), + `details` TEXT +); +CREATE TABLE IF NOT EXISTS "iafd_performers_movies" ( + `performer_id` integer, + `movie_id` integer, + `role` varchar(255), + `notes` varchar(255), + `created_at` TEXT DEFAULT (datetime('now', 'localtime')), + foreign key(`performer_id`) references "iafd_performers"(`id`) on delete CASCADE, + foreign key(`movie_id`) references "iafd_movies"(`id`) on delete CASCADE, + PRIMARY KEY (`movie_id`, `performer_id`) +); +CREATE TABLE IF NOT EXISTS "iafd_task_log" ( + `task_id` integer not null primary key autoincrement, + `before_performers` integer, + `new_performers` integer, + `after_performers` integer, + `before_movies` integer, + `new_movies` integer, + `after_movies` integer, + `before_distributors` integer, + `new_distributors` integer, + `after_distributors` integer, + `before_studios` integer, + `new_studios` integer, + `after_studios` integer, + `task_status` varchar(255), + `created_at` TEXT DEFAULT (datetime('now', 'localtime')), + `updated_at` TEXT DEFAULT (datetime('now', 'localtime')) +); +CREATE TABLE IF NOT EXISTS "iafd_movies" ( + `id` integer not null primary key autoincrement, + `title` varchar(255), + `minutes` varchar(255), + `distributor_id` integer, + `studio_id` integer, + `release_date` varchar(255), + `added_to_IAFD_date` varchar(255), + `all_girl` varchar(255), + `all_male` varchar(255), + `compilation` varchar(255), + `webscene` varchar(255), + `director_id` integer, + `href` varchar(255) UNIQUE, + `created_at` TEXT DEFAULT (datetime('now', 'localtime')), + `updated_at` TEXT DEFAULT (datetime('now', 'localtime')), + foreign key(`studio_id`) references "iafd_studios"(`id`) on delete SET NULL, + foreign key(`distributor_id`) references "iafd_distributors"(`id`) on delete SET NULL +); +CREATE TABLE javhd_models ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + rank INTEGER, + ja_name TEXT, + zh_name TEXT, + en_name TEXT, + url TEXT UNIQUE, + pic TEXT, + height TEXT, + weight TEXT, + breast_size TEXT, + breast_factor TEXT, + hair_color TEXT, + eye_color TEXT, + birth_date TEXT, + ethnicity TEXT, + birth_place TEXT, + created_at TEXT DEFAULT (datetime('now', 'localtime')), + updated_at TEXT DEFAULT (datetime('now', 'localtime')) +); +CREATE TABLE thelordofporn_actress ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + pornstar TEXT, + rating REAL, + rank INTEGER, + votes INTEGER, + href TEXT UNIQUE, + career_start TEXT, + measurements TEXT, + born TEXT, + height TEXT, + weight TEXT, + date_modified TEXT, + global_rank INTEGER, + weekly_rank INTEGER, + last_month_rating REAL, + current_rating REAL, + total_votes INTEGER, + birth_date TEXT, + birth_year TEXT, + birth_place TEXT, + height_ft TEXT, + height_cm TEXT, + weight_lbs TEXT, + weight_kg TEXT, + created_at TEXT DEFAULT (datetime('now', 'localtime')), + updated_at TEXT DEFAULT (datetime('now', 'localtime')) + ); +CREATE TABLE thelordofporn_alias ( + actress_id INTEGER NOT NULL, + alias TEXT NOT NULL, + FOREIGN KEY (actress_id) REFERENCES thelordofporn_actress(id) ON DELETE CASCADE, + PRIMARY KEY(`actress_id`, `alias`) + ); diff --git a/scripts/thelordofporn/tools.py b/scripts/thelordofporn/tools.py new file mode 100644 index 0000000..d96a977 --- /dev/null +++ b/scripts/thelordofporn/tools.py @@ -0,0 +1,166 @@ +import sqlite3 +import json +import re +import logging +from datetime import datetime + +def setup_logging(): + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +db_path = "/root/sharedata/shared.db" + +def connect_db(db_name=db_path): + return sqlite3.connect(db_name) + +def create_tables(conn): + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS thelordofporn_actress ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + pornstar TEXT, + rating REAL, + rank INTEGER, + votes INTEGER, + href TEXT UNIQUE, + career_start TEXT, + measurements TEXT, + born TEXT, + height TEXT, + weight TEXT, + date_modified TEXT, + global_rank INTEGER, + weekly_rank INTEGER, + last_month_rating REAL, + current_rating REAL, + total_votes INTEGER, + birth_date TEXT, + birth_year TEXT, + birth_place TEXT, + height_ft TEXT, + height_cm TEXT, + weight_lbs TEXT, + weight_kg TEXT, + created_at TEXT DEFAULT (datetime('now', 'localtime')), + updated_at TEXT DEFAULT (datetime('now', 'localtime')) + ); + ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS thelordofporn_alias ( + actress_id INTEGER NOT NULL, + alias TEXT NOT NULL, + FOREIGN KEY (actress_id) REFERENCES thelordofporn_actress(id) ON DELETE CASCADE, + PRIMARY KEY(`actress_id`, `alias`) + ); + ''') + conn.commit() + +def load_json(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError) as e: + logging.error(f"Failed to load JSON file: {e}") + return [] + +def clean_alias(alias): + alias = re.sub(r'\(Age \d+\)', '', alias) # 去掉 (Age XX) + return [name.strip() for name in alias.split(',') if name.strip()] + +def parse_numeric(value): + try: + return float(value) + except (ValueError, TypeError): + return 0 # 默认值为 0 + +def insert_actress(conn, actress): + cursor = conn.cursor() + + # 插入 thelordofporn_actress 表 + cursor.execute(''' + INSERT INTO thelordofporn_actress ( + pornstar, rating, rank, votes, href, career_start, measurements, born, + height, weight, date_modified, global_rank, weekly_rank, + last_month_rating, current_rating, total_votes, + birth_date, birth_year, birth_place, height_ft, height_cm, + weight_lbs, weight_kg, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime')) + ON CONFLICT(href) DO UPDATE SET + rating=excluded.rating, + rank=excluded.rank, + votes=excluded.votes, + career_start=excluded.career_start, + measurements=excluded.measurements, + born=excluded.born, + height=excluded.height, + weight=excluded.weight, + date_modified=excluded.date_modified, + global_rank=excluded.global_rank, + weekly_rank=excluded.weekly_rank, + last_month_rating=excluded.last_month_rating, + current_rating=excluded.current_rating, + total_votes=excluded.total_votes, + birth_date=excluded.birth_date, + birth_year=excluded.birth_year, + birth_place=excluded.birth_place, + height_ft=excluded.height_ft, + height_cm=excluded.height_cm, + weight_lbs=excluded.weight_lbs, + weight_kg=excluded.weight_kg, + updated_at=datetime('now', 'localtime'); + ''', ( + actress.get('pornstar', ''), + parse_numeric(actress.get('rating', 0)), + parse_numeric(actress.get('rank', 0)), + parse_numeric(actress.get('votes', 0)), + actress.get('href', ''), + actress.get('career_start', ''), + actress.get('measurements', ''), + actress.get('born', ''), + actress.get('height', ''), + actress.get('weight', ''), + actress.get('date_modified', ''), + parse_numeric(actress.get('global_rank', 0)), + parse_numeric(actress.get('weekly_rank', 0)), + parse_numeric(actress.get('last_month_rating', 0)), + parse_numeric(actress.get('current_rating', 0)), + parse_numeric(actress.get('total_votes', 0)), + actress.get('birth_date', ''), + str(actress.get('birth_year', '')), + actress.get('birth_place', ''), + actress.get('height_ft', ''), + str(actress.get('height_cm', '')), + str(actress.get('weight_lbs', '')), + str(actress.get('weight_kg', '')) + )) + + actress_id = cursor.lastrowid if cursor.lastrowid else cursor.execute("SELECT id FROM thelordofporn_actress WHERE href = ?", (actress.get('href', ''),)).fetchone()[0] + + # 插入 thelordofporn_alias 表 + if 'alias' in actress: + aliases = clean_alias(actress['alias']) + cursor.execute("DELETE FROM thelordofporn_alias WHERE actress_id = ?", (actress_id,)) + for alias in aliases: + cursor.execute("INSERT INTO thelordofporn_alias (actress_id, alias) VALUES (?, ?) ON CONFLICT(actress_id, alias) DO NOTHING ", (actress_id, alias)) + + conn.commit() + +def main(): + setup_logging() + conn = connect_db() + create_tables(conn) + actresses = load_json("./result/actress_detail.json") + + if actresses: + for actress in actresses: + try: + insert_actress(conn, actress) + logging.info(f"Inserted/Updated: {actress.get('pornstar', 'Unknown')}") + except Exception as e: + logging.error(f"Error inserting actress: {e}") + else: + logging.warning("No data to insert.") + + conn.close() + +if __name__ == "__main__": + main()