modify some scripts.

This commit is contained in:
2025-03-04 16:05:47 +08:00
parent 0741ac94ad
commit 3f0a8acb6b
4 changed files with 485 additions and 47 deletions

View File

@ -0,0 +1,166 @@
import sqlite3
import json
import re
import logging
from datetime import datetime
def setup_logging():
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
db_path = "/root/sharedata/shared.db"
def connect_db(db_name=db_path):
return sqlite3.connect(db_name)
def create_tables(conn):
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS thelordofporn_actress (
id INTEGER PRIMARY KEY AUTOINCREMENT,
pornstar TEXT,
rating REAL,
rank INTEGER,
votes INTEGER,
href TEXT UNIQUE,
career_start TEXT,
measurements TEXT,
born TEXT,
height TEXT,
weight TEXT,
date_modified TEXT,
global_rank INTEGER,
weekly_rank INTEGER,
last_month_rating REAL,
current_rating REAL,
total_votes INTEGER,
birth_date TEXT,
birth_year TEXT,
birth_place TEXT,
height_ft TEXT,
height_cm TEXT,
weight_lbs TEXT,
weight_kg TEXT,
created_at TEXT DEFAULT (datetime('now', 'localtime')),
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS thelordofporn_alias (
actress_id INTEGER NOT NULL,
alias TEXT NOT NULL,
FOREIGN KEY (actress_id) REFERENCES thelordofporn_actress(id) ON DELETE CASCADE,
PRIMARY KEY(`actress_id`, `alias`)
);
''')
conn.commit()
def load_json(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
logging.error(f"Failed to load JSON file: {e}")
return []
def clean_alias(alias):
alias = re.sub(r'\(Age \d+\)', '', alias) # 去掉 (Age XX)
return [name.strip() for name in alias.split(',') if name.strip()]
def parse_numeric(value):
try:
return float(value)
except (ValueError, TypeError):
return 0 # 默认值为 0
def insert_actress(conn, actress):
cursor = conn.cursor()
# 插入 thelordofporn_actress 表
cursor.execute('''
INSERT INTO thelordofporn_actress (
pornstar, rating, rank, votes, href, career_start, measurements, born,
height, weight, date_modified, global_rank, weekly_rank,
last_month_rating, current_rating, total_votes,
birth_date, birth_year, birth_place, height_ft, height_cm,
weight_lbs, weight_kg, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET
rating=excluded.rating,
rank=excluded.rank,
votes=excluded.votes,
career_start=excluded.career_start,
measurements=excluded.measurements,
born=excluded.born,
height=excluded.height,
weight=excluded.weight,
date_modified=excluded.date_modified,
global_rank=excluded.global_rank,
weekly_rank=excluded.weekly_rank,
last_month_rating=excluded.last_month_rating,
current_rating=excluded.current_rating,
total_votes=excluded.total_votes,
birth_date=excluded.birth_date,
birth_year=excluded.birth_year,
birth_place=excluded.birth_place,
height_ft=excluded.height_ft,
height_cm=excluded.height_cm,
weight_lbs=excluded.weight_lbs,
weight_kg=excluded.weight_kg,
updated_at=datetime('now', 'localtime');
''', (
actress.get('pornstar', ''),
parse_numeric(actress.get('rating', 0)),
parse_numeric(actress.get('rank', 0)),
parse_numeric(actress.get('votes', 0)),
actress.get('href', ''),
actress.get('career_start', ''),
actress.get('measurements', ''),
actress.get('born', ''),
actress.get('height', ''),
actress.get('weight', ''),
actress.get('date_modified', ''),
parse_numeric(actress.get('global_rank', 0)),
parse_numeric(actress.get('weekly_rank', 0)),
parse_numeric(actress.get('last_month_rating', 0)),
parse_numeric(actress.get('current_rating', 0)),
parse_numeric(actress.get('total_votes', 0)),
actress.get('birth_date', ''),
str(actress.get('birth_year', '')),
actress.get('birth_place', ''),
actress.get('height_ft', ''),
str(actress.get('height_cm', '')),
str(actress.get('weight_lbs', '')),
str(actress.get('weight_kg', ''))
))
actress_id = cursor.lastrowid if cursor.lastrowid else cursor.execute("SELECT id FROM thelordofporn_actress WHERE href = ?", (actress.get('href', ''),)).fetchone()[0]
# 插入 thelordofporn_alias 表
if 'alias' in actress:
aliases = clean_alias(actress['alias'])
cursor.execute("DELETE FROM thelordofporn_alias WHERE actress_id = ?", (actress_id,))
for alias in aliases:
cursor.execute("INSERT INTO thelordofporn_alias (actress_id, alias) VALUES (?, ?) ON CONFLICT(actress_id, alias) DO NOTHING ", (actress_id, alias))
conn.commit()
def main():
setup_logging()
conn = connect_db()
create_tables(conn)
actresses = load_json("./result/actress_detail.json")
if actresses:
for actress in actresses:
try:
insert_actress(conn, actress)
logging.info(f"Inserted/Updated: {actress.get('pornstar', 'Unknown')}")
except Exception as e:
logging.error(f"Error inserting actress: {e}")
else:
logging.warning("No data to insert.")
conn.close()
if __name__ == "__main__":
main()