import sqlite3 import json import config import utils import logging from datetime import datetime # 连接 SQLite 数据库 DB_PATH = f"{config.global_share_data_dir}/shared.db" # 替换为你的数据库文件 conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # 获取当前时间 def get_current_time(): return datetime.now().strftime("%Y-%m-%d %H:%M:%S") # 插入演员信息 def insert_or_update_performer(data): try: cursor.execute(""" INSERT INTO iafd_performers (href, name, gender, birthday, astrology, birthplace, years_active, ethnicity, nationality, hair_colors, eye_color, height_str, weight_str, measurements, tattoos, piercings, weight, height, movies_cnt, vixen_cnt, blacked_cnt, tushy_cnt, x_art_cnt, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET name = excluded.name, gender = excluded.gender, birthday = excluded.birthday, astrology = excluded.astrology, birthplace = excluded.birthplace, years_active = excluded.years_active, ethnicity = excluded.ethnicity, nationality = excluded.nationality, hair_colors = excluded.hair_colors, eye_color = excluded.eye_color, height_str = excluded.height_str, weight_str = excluded.weight_str, measurements = excluded.measurements, tattoos = excluded.tattoos, piercings = excluded.piercings, weight = excluded.weight, height = excluded.height, movies_cnt = excluded.movies_cnt, vixen_cnt = excluded.vixen_cnt, blacked_cnt = excluded.blacked_cnt, tushy_cnt = excluded.tushy_cnt, x_art_cnt = excluded.x_art_cnt, updated_at = datetime('now', 'localtime') """, ( data["href"], data["person"], data.get("gender"), data.get("birthday"), data.get("astrology"), data.get("birthplace"), data.get("years_active"), data.get("ethnicity"), data.get("nationality"), data.get("hair_colors"), data.get("eye_color"), data.get("height"), data.get("weight"), data.get("measurements"), data.get("tattoos"), data.get("piercings"), utils.parse_weight(data.get('weight')), utils.parse_height(data.get('height')), data.get("movies_cnt", 0), data.get("vixen_cnt", 0), data.get("blacked_cnt", 0), data.get("tushy_cnt", 0), data.get("x_art_cnt", 0) )) # 获取 performer_id cursor.execute("SELECT id FROM iafd_performers WHERE href = ?", (data["href"],)) performer_id = cursor.fetchone()[0] # 删除旧的 alias cursor.execute("DELETE FROM iafd_performer_aliases WHERE performer_id = ?", (performer_id,)) # 插入新的 alias #for alias in data.get("performer_aka", []): for alias in data.get("performer_aka") or []: if alias.lower() != "no known aliases": cursor.execute("INSERT INTO iafd_performer_aliases (performer_id, alias) VALUES (?, ?) ON CONFLICT(performer_id, alias) DO NOTHING ", (performer_id, alias)) conn.commit() logging.debug(f"成功插入/更新演员: {data['person']}") return performer_id except sqlite3.Error as e: conn.rollback() logging.error(f"数据库错误: {e}") return None except Exception as e: conn.rollback() logging.error(f"未知错误: {e}") return None # 按 id 或 href 删除演员 def delete_performer(identifier): try: if isinstance(identifier, int): cursor.execute("DELETE FROM iafd_performers WHERE id = ?", (identifier,)) elif isinstance(identifier, str): cursor.execute("DELETE FROM iafd_performers WHERE href = ?", (identifier,)) else: logging.warning("无效的删除参数") return conn.commit() logging.info(f"成功删除演员: {identifier}") except sqlite3.Error as e: conn.rollback() logging.error(f"删除失败: {e}") # 按 id、href 或 name 查询演员信息 def query_performer(identifier): try: if isinstance(identifier, int): cursor.execute("SELECT * FROM iafd_performers WHERE id = ?", (identifier,)) elif "http" in identifier: cursor.execute("SELECT * FROM iafd_performers WHERE href = ?", (identifier,)) else: cursor.execute("SELECT * FROM iafd_performers WHERE name LIKE ?", (f"%{identifier}%",)) performer = cursor.fetchone() if performer: cursor.execute("SELECT alias FROM iafd_performer_aliases WHERE performer_id = ?", (performer[0],)) aliases = [row[0] for row in cursor.fetchall()] result = dict(zip([desc[0] for desc in cursor.description], performer)) result["performer_aka"] = aliases return result else: logging.warning(f"未找到演员: {identifier}") return None except sqlite3.Error as e: logging.error(f"查询失败: {e}") return None # 按条件查询 href 列表 def query_performer_hrefs(**filters): try: sql = "SELECT href FROM iafd_performers WHERE 1=1" params = [] if "id" in filters: sql += " AND id = ?" params.append(filters["id"]) if "href" in filters: sql += " AND href = ?" params.append(filters["href"]) if "name" in filters: sql += " AND name LIKE ?" params.append(f"%{filters['name']}%") cursor.execute(sql, params) return [row[0].lower() for row in cursor.fetchall()] # 返回小写 except sqlite3.Error as e: logging.error(f"查询 href 失败: {e}") return None # 插入或更新发行商 """ def insert_or_update_distributor(data): try: cursor.execute(""" INSERT INTO iafd_distributors (name, href, updated_at) VALUES (?, ? , datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET name = excluded.name, updated_at = datetime('now', 'localtime') """, (data["name"], data["href"])) conn.commit() # 获取 performer_id cursor.execute("SELECT id FROM iafd_distributors WHERE href = ?", (data["href"],)) dist_id = cursor.fetchone()[0] if dist_id: logging.debug(f"成功插入/更新发行商: {data['name']}") return dist_id else: return None except sqlite3.Error as e: conn.rollback() logging.error(f"数据库错误: {e}") return None # 删除发行商(按 id 或 name) """ def delete_distributor(identifier): try: if isinstance(identifier, int): cursor.execute("DELETE FROM iafd_distributors WHERE id = ?", (identifier,)) elif isinstance(identifier, str): cursor.execute("DELETE FROM iafd_distributors WHERE name = ?", (identifier,)) conn.commit() logging.info(f"成功删除发行商: {identifier}") except sqlite3.Error as e: conn.rollback() logging.error(f"删除失败: {e}") # 查询发行商(按 id 或 name) """ def query_distributor(identifier): try: if isinstance(identifier, int): cursor.execute("SELECT * FROM iafd_distributors WHERE id = ?", (identifier,)) else: cursor.execute("SELECT * FROM iafd_distributors WHERE name LIKE ?", (f"%{identifier}%",)) distributor = cursor.fetchone() if distributor: return dict(zip([desc[0] for desc in cursor.description], distributor)) else: logging.warning(f"未找到发行商: {identifier}") return None except sqlite3.Error as e: logging.error(f"查询失败: {e}") return None # 按条件查询 href 列表 def query_distributor_hrefs(**filters): try: sql = "SELECT href FROM iafd_distributors WHERE 1=1" params = [] if "id" in filters: sql += " AND id = ?" params.append(filters["id"]) if "url" in filters: sql += " AND href = ?" params.append(filters["href"]) if "name" in filters: sql += " AND name LIKE ?" params.append(f"%{filters['name']}%") cursor.execute(sql, params) return [row[0].lower() for row in cursor.fetchall()] # 链接使用小写 except sqlite3.Error as e: logging.error(f"查询 href 失败: {e}") return None # """ 插入或更新制作公司 """ def insert_or_update_studio(data): try: cursor.execute(""" INSERT INTO iafd_studios (name, href, updated_at) VALUES (?, ?, datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET name = excluded.name, updated_at = datetime('now', 'localtime') """, (data["name"], data["href"])) conn.commit() # 获取 performer_id cursor.execute("SELECT id FROM iafd_studios WHERE href = ?", (data["href"],)) stu_id = cursor.fetchone()[0] if stu_id: logging.debug(f"成功插入/更新发行商: {data['name']}") return stu_id else: return None except sqlite3.Error as e: conn.rollback() logging.error(f"数据库错误: {e}") return None # """ 删除制作公司(按 id 或 name) """ def delete_studio(identifier): try: if isinstance(identifier, int): cursor.execute("DELETE FROM iafd_studios WHERE id = ?", (identifier,)) elif isinstance(identifier, str): cursor.execute("DELETE FROM iafd_studios WHERE name = ?", (identifier,)) conn.commit() logging.info(f"成功删除制作公司: {identifier}") except sqlite3.Error as e: conn.rollback() logging.error(f"删除失败: {e}") # """ 查询制作公司(按 id 或 name) """ def query_studio(identifier): try: if isinstance(identifier, int): cursor.execute("SELECT * FROM iafd_studios WHERE id = ?", (identifier,)) else: cursor.execute("SELECT * FROM iafd_studios WHERE name LIKE ?", (f"%{identifier}%",)) studio = cursor.fetchone() if studio: return dict(zip([desc[0] for desc in cursor.description], studio)) else: logging.warning(f"未找到制作公司: {identifier}") return None except sqlite3.Error as e: logging.error(f"查询失败: {e}") return None # 按条件查询 href 列表 def query_studio_hrefs(**filters): try: sql = "SELECT href FROM iafd_studios WHERE 1=1" params = [] if "id" in filters: sql += " AND id = ?" params.append(filters["id"]) if "href" in filters: sql += " AND href = ?" params.append(filters["href"]) if "name" in filters: sql += " AND name LIKE ?" params.append(f"%{filters['name']}%") cursor.execute(sql, params) return [row[0].lower() for row in cursor.fetchall()] # 链接使用小写 except sqlite3.Error as e: logging.error(f"查询 href 失败: {e}") return None # """从指定表中通过 href 查找 id""" def get_id_by_href(table: str, href: str) -> int: cursor.execute(f"SELECT id FROM {table} WHERE href = ?", (href,)) row = cursor.fetchone() return row[0] if row else None # """插入或更新电影数据""" def insert_or_update_movie(movie_data): try: # 获取相关 ID distributor_id = get_id_by_href('iafd_distributors', movie_data['DistributorHref']) studio_id = get_id_by_href('iafd_studios', movie_data['StudioHref']) director_id = get_id_by_href('iafd_performers', movie_data['DirectorHref']) # 插入或更新电影信息 cursor.execute( """ INSERT INTO iafd_movies (title, minutes, distributor_id, studio_id, release_date, added_to_IAFD_date, all_girl, all_male, compilation, webscene, director_id, href, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime')) ON CONFLICT(href) DO UPDATE SET title=excluded.title, minutes=excluded.minutes, distributor_id=excluded.distributor_id, studio_id=excluded.studio_id, release_date=excluded.release_date, added_to_IAFD_date=excluded.added_to_IAFD_date, all_girl=excluded.all_girl, all_male=excluded.all_male, compilation=excluded.compilation, webscene=excluded.webscene, director_id=excluded.director_id, updated_at = datetime('now', 'localtime') """, (movie_data['title'], movie_data['Minutes'], distributor_id, studio_id, movie_data['ReleaseDate'], movie_data['AddedtoIAFDDate'], movie_data['All-Girl'], movie_data['All-Male'], movie_data['Compilation'], movie_data['Webscene'], director_id, movie_data['href']) ) conn.commit() logging.debug("Movie inserted/updated: %s", movie_data['title']) # 获取插入的 movie_id cursor.execute("SELECT id FROM iafd_movies WHERE href = ?", (movie_data['href'],)) movie_id = cursor.fetchone()[0] # 插入 performers_movies 关系表 for performer in movie_data.get('Performers', []): performer_id = get_id_by_href('iafd_performers', performer['href']) if performer_id: notes = '|'.join(performer['tags']) cursor.execute( """ INSERT INTO iafd_performers_movies (performer_id, movie_id, role, notes) VALUES (?, ?, ?, ?) ON CONFLICT(movie_id, performer_id) DO UPDATE SET notes=excluded.notes """, (performer_id, movie_id, "Actor", notes) ) logging.debug(f"Performers {performer['href']} linked to movie: %s", movie_data['title']) else: logging.warning(f'missing performer, url {performer['href']}, in movie: ({movie_data['title']}) {movie_data['href']}') # 插入 movies_appers_in 表 for appears in movie_data.get("AppearsIn", []): appears_in_id = get_id_by_href('iafd_movies', appears['href']) if appears_in_id: appears_in_id = appears_in_id[0] cursor.execute(""" INSERT INTO iafd_movies_appers_in (movie_id, appears_in_id, gradation, notes) VALUES (?, ?, ?, ?) ON CONFLICT(movie_id, appears_in_id) DO NOTHING """, (movie_id, appears_in_id, 1, appears["title"])) else: logging.warning(f'missing AppearsIn movie in movies table, parent_url {appears['href']}, in movie: ({movie_data['title']}) {movie_data['href']}') conn.commit() return movie_id except Exception as e: conn.rollback() logging.error("Error inserting movie: %s", e) return None # 删除电影数据""" def delete_movie(identifier): try: if isinstance(identifier, int): cursor.execute("DELETE FROM iafd_movies WHERE id = ?", (identifier,)) elif isinstance(identifier, str): cursor.execute("DELETE FROM iafd_movies WHERE href = ?", (identifier,)) else: logging.warning("无效的删除参数") return conn.commit() logging.info(f"Deleted movie with {identifier}") except sqlite3.Error as e: conn.rollback() logging.error("Error deleting movie: %s", e) # 查找电影数据""" def query_movies(identifier): try: if isinstance(identifier, int): cursor.execute("SELECT * FROM iafd_movies WHERE id = ?", (identifier,)) elif "http" in identifier: cursor.execute("SELECT * FROM iafd_movies WHERE href = ?", (identifier,)) else: cursor.execute("SELECT * FROM iafd_movies WHERE title LIKE ?", (f"%{identifier}%",)) movie = cursor.fetchone() if movie: cursor.execute("SELECT * FROM iafd_performers_movies WHERE performer_id = ?", (movie[0],)) performers = [row[0] for row in cursor.fetchall()] result = dict(zip([desc[0] for desc in cursor.description], performers)) result["performers"] = performers return result else: logging.warning(f"find no data: {identifier}") return None except sqlite3.Error as e: logging.error(f"查询失败: {e}") return None # 按条件查询 href 列表 def query_movie_hrefs(**filters): try: sql = "SELECT href FROM iafd_movies WHERE 1=1" params = [] if "id" in filters: sql += " AND id = ?" params.append(filters["id"]) if "href" in filters: sql += " AND href = ?" params.append(filters["href"]) if "title" in filters: sql += " AND title LIKE ?" params.append(f"%{filters['title']}%") cursor.execute(sql, params) return [row[0].lower() for row in cursor.fetchall()] # 链接使用小写 except sqlite3.Error as e: logging.error(f"查询 href 失败: {e}") return [] # 插入一条任务日志 def insert_task_log(): try: cursor.execute(""" INSERT INTO iafd_task_log (task_status) VALUES ('Start') """) conn.commit() return cursor.lastrowid # 获取插入的 task_id except sqlite3.Error as e: logging.error(f"插入任务失败: {e}") return None # 更新任务日志的字段 def update_task_log(task_id, **kwargs): try: fields = ", ".join(f"{key} = ?" for key in kwargs.keys()) params = list(kwargs.values()) + [task_id] sql = f"UPDATE iafd_task_log SET {fields}, updated_at = datetime('now', 'localtime') WHERE task_id = ?" cursor.execute(sql, params) conn.commit() except sqlite3.Error as e: logging.error(f"更新任务 {task_id} 失败: {e}") # 任务结束,更新字段 def finalize_task_log(task_id): try: # 获取 performers、studios 等表的最终行数 cursor.execute("SELECT COUNT(*) FROM iafd_performers") after_performers = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM iafd_movies") after_movies = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM iafd_distributors") after_distributors = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM iafd_studios") after_studios = cursor.fetchone()[0] # 更新 task_log update_task_log(task_id, after_performers=after_performers, after_movies=after_movies, after_distributors=after_distributors, after_studios=after_studios, task_status="Success") except sqlite3.Error as e: logging.error(f"任务 {task_id} 结束失败: {e}") if __name__ == "__main__": try: with open('../result/detail.json', 'r') as file: performers = json.load(file) for performer in performers: insert_or_update_performer(performer) print(query_performer("Kirsten")) #delete_performer("https://www.iafd.com/person.rme/id=ca699282-1b57-4ce7-9bcc-d7799a292e34") print(query_performer_hrefs()) except FileNotFoundError: logging.info("detail.json not found, starting fresh.")