modify scripts

This commit is contained in:
oscarz
2025-03-17 11:30:35 +08:00
parent e6327fbe73
commit d5dc76b87f
178 changed files with 44 additions and 184447 deletions

View File

@ -0,0 +1,225 @@
"""
Script Name:
Description: 从 thelordofporn.com 上获取女优列表,并逐个获取女优详细信息。
由于网站使用了cloudflare, 无法直接爬取,使用 cloudscraper 绕过限制。
list_fetch.py 从网站上获取列表, 并以json的形式把结果输出到本地文件, 同时生成csv文件;
actress_fetch.py 则把上一步获取到的列表,读取详情页面,合并进来一些详细信息。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import json
import csv
import os
import re
import time
import random
import cloudscraper
from bs4 import BeautifulSoup
import config
# 文件路径
DIR_RES = config.global_host_data_dir
ACTRESSES_FILE = f"{DIR_RES}/actresses.json"
DETAILS_JSON_FILE = f"{DIR_RES}/thelordofporn_pornstars.json"
DETAILS_CSV_FILE = f"{DIR_RES}/thelordofporn_pornstars.csv"
# 请求头和 Cookies模拟真实浏览器
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9",
}
COOKIES = {
"cf_clearance": "your_clearance_token_here" # 需要根据 Cloudflare 的验证情况更新
}
# 解析出生日期和地点
def parse_birth_info(text):
match = re.match(r"(.+?) (\d{1,2}), (\d{4}) in (.+)", text)
if match:
return {
"birth_date": f"{match.group(1)} {match.group(2)}, {match.group(3)}",
"birth_year": match.group(3),
"birth_place": match.group(4),
}
return {"birth_date": text, "birth_year": "", "birth_place": ""}
# 解析身高
def parse_height(text):
match = re.match(r"(\d+)\s*ft\s*(\d*)\s*in\s*\((\d+)\s*cm\)", text)
if match:
height_ft = f"{match.group(1)}'{match.group(2)}\""
return {"height_ft": height_ft.strip(), "height_cm": match.group(3)}
return {"height_ft": text, "height_cm": ""}
# 解析体重
def parse_weight(text):
match = re.match(r"(\d+)\s*lbs\s*\((\d+)\s*kg\)", text)
if match:
return {"weight_lbs": match.group(1), "weight_kg": match.group(2)}
return {"weight_lbs": text, "weight_kg": ""}
# 解析网页内容
def parse_page(actress, html):
soup = BeautifulSoup(html, "html.parser")
# 确保页面结构正确
if not soup.find("main", {"id": "content", "class": "site-content"}):
return None
# 提取基本信息
entry_header = soup.find("header", class_="entry-header")
name_el = entry_header.find("h1", class_="entry-title") if entry_header else None
name = name_el.text.strip() if name_el else ""
date_modified_el = soup.find("time", itemprop="dateModified")
if date_modified_el:
date_modified = date_modified_el.get("content", "").strip()
else:
date_modified = ""
# 提取 metadata
global_rank = ""
weekly_rank = ""
last_month_rating = ""
current_rating = ""
total_votes = ""
for div in entry_header.find_all("div", class_="porn-star-rank__item"):
text = div.text.strip()
if "Global Rank" in text:
global_rank = div.find("b").text.strip()
elif "Weekly Rank" in text:
weekly_rank = div.find("b").text.strip()
for item in soup.find_all("div", class_="specifications__item--horizontal"):
text = item.text.strip()
if "Last Month" in text:
last_month_rating = item.find("b").text.strip()
elif "Rating Av." in text:
current_rating = item.find("b").text.strip()
elif "Total of" in text:
total_votes = item.find("b").text.strip()
# 解析详细属性
attributes = {}
for row in soup.find_all("div", class_="specifications-grid-row"):
items = row.find_all("div", class_="specifications-grid-item")
if len(items) == 2:
label = items[0].find("h5").text.strip()
value = items[0].find("span").text.strip()
attributes[label] = value
label2 = items[1].find("h5").text.strip()
value2 = items[1].find("span").text.strip()
attributes[label2] = value2
# 解析出生信息、身高、体重等
birth_info = parse_birth_info(attributes.get("Born", ""))
height_info = parse_height(attributes.get("Height", ""))
weight_info = parse_weight(attributes.get("Weight", ""))
return {
"pornstar": actress['pornstar'],
"rating": actress['rating'],
"rank": actress['rank'],
"votes": actress['votes'],
"href": actress['href'],
'name': name,
"alias": attributes.get("Name", ""),
"career_start": attributes.get("Career start", ""),
"measurements": attributes.get("Measurements", ""),
"born": attributes.get("Born", ""),
"height": attributes.get("Height", ""),
"weight": attributes.get("Weight", ""),
"date_modified": date_modified,
"global_rank": global_rank,
"weekly_rank": weekly_rank,
"last_month_rating": last_month_rating,
"current_rating": current_rating,
"total_votes": total_votes,
**birth_info,
**height_info,
**weight_info,
}
# 读取已处理数据
def load_existing_data():
if os.path.exists(DETAILS_JSON_FILE):
with open(DETAILS_JSON_FILE, "r", encoding="utf-8") as f:
return {item["pornstar"]: item for item in json.load(f)}
return {}
# 访问页面
def fetch_page(url):
scraper = cloudscraper.create_scraper()
for _ in range(500): # 最多重试5次
try:
response = scraper.get(url, headers=HEADERS, cookies=COOKIES, timeout=10)
if response.status_code == 200 and "specifications-grid-row" in response.text:
return response.text
except Exception as e:
print(f"请求 {url} 失败,错误: {e}")
time.sleep(random.uniform(2, 5)) # 随机延迟
return None
# 处理数据并保存
def process_data():
with open(ACTRESSES_FILE, "r", encoding="utf-8") as f:
actresses = json.load(f)
existing_data = load_existing_data()
updated_data = list(existing_data.values())
for actress in actresses:
name, url = actress["pornstar"], actress["href"]
if name in existing_data:
print(f"跳过已处理: {name}")
continue
print(f"正在处理: {name} - {url}")
html = fetch_page(url)
if not html:
print(f"无法获取页面: {url}")
continue
details = parse_page(actress, html)
if details:
updated_data.append(details)
existing_data[name] = details
with open(DETAILS_JSON_FILE, "w", encoding="utf-8") as jsonfile:
json.dump(updated_data, jsonfile, indent=4, ensure_ascii=False)
# 从 JSON 生成 CSV
def json_to_csv():
if not os.path.exists(DETAILS_JSON_FILE):
print("没有 JSON 文件,跳过 CSV 生成")
return
with open(DETAILS_JSON_FILE, "r", encoding="utf-8") as jsonfile:
data = json.load(jsonfile)
fieldnames = data[0].keys()
with open(DETAILS_CSV_FILE, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
if __name__ == '__main__':
# 确保目录存在
os.makedirs(DIR_RES, exist_ok=True)
process_data()
json_to_csv()
print("数据处理完成!")

27
thelordofporn/config.py Normal file
View File

@ -0,0 +1,27 @@
import logging
import os
import inspect
from datetime import datetime
# 映射到宿主机的目录
home_dir = os.path.expanduser("~")
global_host_data_dir = f'{home_dir}/hostdir/scripts_data/thelordofporn'
# 设置日志配置
def setup_logging(log_filename=None):
# 如果未传入 log_filename则使用当前脚本名称作为日志文件名
if log_filename is None:
# 获取调用 setup_logging 的脚本文件名
caller_frame = inspect.stack()[1]
caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0]
# 获取当前日期,格式为 yyyymmdd
current_date = datetime.now().strftime('%Y%m%d')
# 拼接 log 文件名,将日期加在扩展名前
log_filename = f'./log/{caller_filename}_{current_date}.log'
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s',
handlers=[
logging.FileHandler(log_filename),
logging.StreamHandler()
])

138
thelordofporn/list_fetch.py Normal file
View File

@ -0,0 +1,138 @@
"""
Script Name:
Description: 从 thelordofporn.com 上获取女优列表,并逐个获取女优详细信息。
由于网站使用了cloudflare, 无法直接爬取,使用 cloudscraper 绕过限制。
list_fetch.py 从网站上获取列表, 并以json的形式把结果输出到本地文件, 同时生成csv文件;
actress_fetch.py 则把上一步获取到的列表,读取详情页面,合并进来一些详细信息。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import time
import json
import csv
import os
import random
import cloudscraper
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import config
DIR_RES = config.global_host_data_dir
ACTRESSES_JSON = f"{DIR_RES}/actresses.json"
ACTRESSES_CSV = f"{DIR_RES}/actresses.csv"
# 设置目标 URL
BASE_URL = "https://thelordofporn.com/pornstars/"
# 伪装成真实浏览器
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Referer": "https://thelordofporn.com/",
}
# 记录抓取数据
actress_list = []
# 创建 CloudScraper 以绕过 Cloudflare
scraper = cloudscraper.create_scraper(
browser={"browser": "chrome", "platform": "windows", "mobile": False}
)
# 爬取页面函数(支持分页)
def scrape_page(url):
print(f"[INFO] 正在抓取: {url}")
# 网络访问失败时自动重试
for attempt in range(3):
try:
response = scraper.get(url, headers=HEADERS, timeout=10)
response.raise_for_status() # 检查 HTTP 状态码
# 检查是否返回了有效的页面
soup = BeautifulSoup(response.text, "html.parser")
main_tag = soup.find("main", class_="site-content")
if main_tag:
break # 如果页面内容正确,则继续解析
else:
print(f"[WARNING] 服务器返回的页面不完整,尝试重新获取 ({attempt+1}/3)")
time.sleep(random.uniform(2, 5)) # 休眠 2-5 秒再试
except Exception as e:
print(f"[ERROR] 访问失败 ({attempt+1}/3): {e}")
time.sleep(random.uniform(2, 5)) # 休眠 2-5 秒再试
else:
print("[ERROR] 多次尝试后仍然失败,跳过该页面")
return None
#soup = BeautifulSoup(response.text, "html.parser")
# 解析演员信息
articles = soup.find_all("article", class_="loop-item")
for article in articles:
try:
# 获取演员详情
title_tag = article.find("h3", class_="loop-item__title").find("a")
title = title_tag.text.strip()
href = title_tag["href"]
# 获取评分
rating_tag = article.find("div", class_="loop-item__rating")
rating = rating_tag.text.strip() if rating_tag else "N/A"
# 获取 Rank 和 Votes
meta_tags = article.find("div", class_="loop-item__rank").find_all("span")
rank = meta_tags[0].find("b").text.strip() if meta_tags else "N/A"
votes = meta_tags[1].find("b").text.strip() if len(meta_tags) > 1 else "N/A"
# 存入列表
actress_list.append({
"pornstar": title,
"rating": rating,
"rank": rank,
"votes": votes,
"href": href
})
print(f"-----[INFO] 获取演员: {title} (Rank: {rank}, Votes: {votes}, Rating: {rating})-----")
except Exception as e:
print(f"[ERROR] 解析演员信息失败: {e}")
# 查找下一页链接
next_page_tag = soup.select_one(".nav-links .next.page-numbers")
if next_page_tag:
next_page_url = urljoin(BASE_URL, next_page_tag["href"])
print(f"[INFO] 发现下一页: {next_page_url}")
time.sleep(random.uniform(1, 3)) # 休眠 1-3 秒,避免被封
scrape_page(next_page_url)
else:
print("[INFO] 已抓取所有页面,爬取结束")
# 保存数据
def save_data():
# 确保目录存在
os.makedirs(DIR_RES, exist_ok=True)
# 保存数据为 JSON
with open(ACTRESSES_JSON, "w", encoding="utf-8") as json_file:
json.dump(actress_list, json_file, ensure_ascii=False, indent=4)
print(f"[INFO] 数据已保存到 {ACTRESSES_JSON}")
# 保存数据为 CSV
with open(ACTRESSES_CSV, "w", encoding="utf-8", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=["pornstar", "rating", "rank", "votes", "href"])
writer.writeheader()
writer.writerows(actress_list)
print(f"[INFO] 数据已保存到 {ACTRESSES_CSV}")
if __name__ == '__main__':
scrape_page(BASE_URL)
save_data()

166
thelordofporn/tools.py Normal file
View File

@ -0,0 +1,166 @@
import sqlite3
import json
import re
import logging
from datetime import datetime
def setup_logging():
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
db_path = "/root/sharedata/shared.db"
def connect_db(db_name=db_path):
return sqlite3.connect(db_name)
def create_tables(conn):
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS thelordofporn_actress (
id INTEGER PRIMARY KEY AUTOINCREMENT,
pornstar TEXT,
rating REAL,
rank INTEGER,
votes INTEGER,
href TEXT UNIQUE,
career_start TEXT,
measurements TEXT,
born TEXT,
height TEXT,
weight TEXT,
date_modified TEXT,
global_rank INTEGER,
weekly_rank INTEGER,
last_month_rating REAL,
current_rating REAL,
total_votes INTEGER,
birth_date TEXT,
birth_year TEXT,
birth_place TEXT,
height_ft TEXT,
height_cm TEXT,
weight_lbs TEXT,
weight_kg TEXT,
created_at TEXT DEFAULT (datetime('now', 'localtime')),
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS thelordofporn_alias (
actress_id INTEGER NOT NULL,
alias TEXT NOT NULL,
FOREIGN KEY (actress_id) REFERENCES thelordofporn_actress(id) ON DELETE CASCADE,
PRIMARY KEY(`actress_id`, `alias`)
);
''')
conn.commit()
def load_json(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
logging.error(f"Failed to load JSON file: {e}")
return []
def clean_alias(alias):
alias = re.sub(r'\(Age \d+\)', '', alias) # 去掉 (Age XX)
return [name.strip() for name in alias.split(',') if name.strip()]
def parse_numeric(value):
try:
return float(value)
except (ValueError, TypeError):
return 0 # 默认值为 0
def insert_actress(conn, actress):
cursor = conn.cursor()
# 插入 thelordofporn_actress 表
cursor.execute('''
INSERT INTO thelordofporn_actress (
pornstar, rating, rank, votes, href, career_start, measurements, born,
height, weight, date_modified, global_rank, weekly_rank,
last_month_rating, current_rating, total_votes,
birth_date, birth_year, birth_place, height_ft, height_cm,
weight_lbs, weight_kg, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now', 'localtime'))
ON CONFLICT(href) DO UPDATE SET
rating=excluded.rating,
rank=excluded.rank,
votes=excluded.votes,
career_start=excluded.career_start,
measurements=excluded.measurements,
born=excluded.born,
height=excluded.height,
weight=excluded.weight,
date_modified=excluded.date_modified,
global_rank=excluded.global_rank,
weekly_rank=excluded.weekly_rank,
last_month_rating=excluded.last_month_rating,
current_rating=excluded.current_rating,
total_votes=excluded.total_votes,
birth_date=excluded.birth_date,
birth_year=excluded.birth_year,
birth_place=excluded.birth_place,
height_ft=excluded.height_ft,
height_cm=excluded.height_cm,
weight_lbs=excluded.weight_lbs,
weight_kg=excluded.weight_kg,
updated_at=datetime('now', 'localtime');
''', (
actress.get('pornstar', ''),
parse_numeric(actress.get('rating', 0)),
parse_numeric(actress.get('rank', 0)),
parse_numeric(actress.get('votes', 0)),
actress.get('href', ''),
actress.get('career_start', ''),
actress.get('measurements', ''),
actress.get('born', ''),
actress.get('height', ''),
actress.get('weight', ''),
actress.get('date_modified', ''),
parse_numeric(actress.get('global_rank', 0)),
parse_numeric(actress.get('weekly_rank', 0)),
parse_numeric(actress.get('last_month_rating', 0)),
parse_numeric(actress.get('current_rating', 0)),
parse_numeric(actress.get('total_votes', 0)),
actress.get('birth_date', ''),
str(actress.get('birth_year', '')),
actress.get('birth_place', ''),
actress.get('height_ft', ''),
str(actress.get('height_cm', '')),
str(actress.get('weight_lbs', '')),
str(actress.get('weight_kg', ''))
))
actress_id = cursor.lastrowid if cursor.lastrowid else cursor.execute("SELECT id FROM thelordofporn_actress WHERE href = ?", (actress.get('href', ''),)).fetchone()[0]
# 插入 thelordofporn_alias 表
if 'alias' in actress:
aliases = clean_alias(actress['alias'])
cursor.execute("DELETE FROM thelordofporn_alias WHERE actress_id = ?", (actress_id,))
for alias in aliases:
cursor.execute("INSERT INTO thelordofporn_alias (actress_id, alias) VALUES (?, ?) ON CONFLICT(actress_id, alias) DO NOTHING ", (actress_id, alias))
conn.commit()
def main():
setup_logging()
conn = connect_db()
#create_tables(conn)
actresses = load_json("./result/actress_detail.json")
if actresses:
for actress in actresses:
try:
insert_actress(conn, actress)
logging.info(f"Inserted/Updated: {actress.get('pornstar', 'Unknown')}")
except Exception as e:
logging.error(f"Error inserting actress: {e}")
else:
logging.warning("No data to insert.")
conn.close()
if __name__ == "__main__":
main()

205
thelordofporn/top_scenes.py Normal file
View File

@ -0,0 +1,205 @@
import requests
from bs4 import BeautifulSoup
import os
import sys
import random
import time
import re
import logging
import csv
from datetime import datetime
from datetime import date
import config # 日志配置
import cloudscraper
# 日志
config.setup_logging()
httpx_logger = logging.getLogger("httpx")
httpx_logger.setLevel(logging.DEBUG)
# 配置基础URL和输出文件
base_url = 'https://thelordofporn.com/'
list_url_scenes = 'https://thelordofporn.com/category/top-10/porn-scenes-movies/'
list_url_pornstars = 'https://thelordofporn.com/category/pornstars-top-10/'
curr_novel_pages = 0
res_dir = 'result'
top_scenes_file = f'{res_dir}/top_scenes_list.csv'
top_pornstars_file = f'{res_dir}/top_pornstars_list.csv'
# 请求头和 Cookies模拟真实浏览器
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9",
}
COOKIES = {
"cf_clearance": "your_clearance_token_here" # 需要根据 Cloudflare 的验证情况更新
}
# 定义获取页面内容的函数,带重试机制
def get_page_content(url, max_retries=100, sleep_time=5, default_timeout=10):
scraper = cloudscraper.create_scraper(
browser={"browser": "chrome", "platform": "windows", "mobile": False}
)
retries = 0
while retries < max_retries:
try:
response = scraper.get(url, headers=HEADERS, cookies=COOKIES, timeout=default_timeout)
if response.status_code == 200 and "content-area content-area--full-width" in response.text :
return response.text # 请求成功,返回内容
except requests.RequestException as e:
retries += 1
logging.info(f"Warn fetching page {url}: {e}. Retrying {retries}/{max_retries}...")
if retries >= max_retries:
logging.error(f"Failed to fetch page {url} after {max_retries} retries.")
return None
time.sleep(sleep_time) # 休眠指定的时间,然后重试
# 获取 top scenes and movies
def get_scenes(base_url, output_file=top_scenes_file):
# 初始化变量
current_url = base_url
all_data = []
while current_url:
try:
logging.info(f"Fetching URL: {current_url}")
# 发起网络请求
content = get_page_content(current_url)
# 解析网页内容
soup = BeautifulSoup(content, "html.parser")
articles = soup.find_all("article", class_="loop-item loop-item--top loop-item--ca_prod_movies__scen")
if not articles:
logging.warning(f"No articles found on page: {current_url}")
# 解析每个 article 标签
for article in articles:
try:
# 获取 href 和 title
a_tag = article.find("a", class_="loop-item__image")
title = a_tag.get("title", "").strip()
href = a_tag.get("href", "").strip()
if title and href:
all_data.append({
'title': title,
'href': href
})
logging.info(f"Extracted: {title} -> {href}")
else:
logging.warning("Missing title or href in an article.")
except Exception as e:
logging.error(f"Error parsing article: {e}")
# 找下一页链接
next_page = soup.find("a", class_="next page-numbers")
if next_page:
current_url = next_page.get("href", "").strip()
else:
current_url = None
logging.info("No more pages to fetch.")
# 等待一段时间以避免被目标网站封禁
time.sleep(2)
except requests.exceptions.RequestException as e:
logging.error(f"Network error while fetching {current_url}: {e}")
break
except Exception as e:
logging.error(f"Unexpected error: {e}")
break
# 保存结果到文件
csv_headers = ["title", "href"]
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_headers)
writer.writeheader()
writer.writerows(all_data)
logging.info(f"Data successfully saved to {output_file}.")
# 获取 top pornstars
def get_pornstars(base_url, output_file=top_pornstars_file):
# 初始化变量
current_url = base_url
all_data = []
while current_url:
try:
logging.info(f"Fetching URL: {current_url}")
# 发起网络请求
content = get_page_content(current_url)
# 解析网页内容
soup = BeautifulSoup(content, "html.parser")
articles = soup.find_all("article", class_="loop-item loop-item--top loop-item--ca_prod_pornstars")
if not articles:
logging.warning(f"No articles found on page: {current_url}")
# 解析每个 article 标签
for article in articles:
try:
# 获取 href 和 title
a_tag = article.find("a", class_="loop-item__image")
title = a_tag.get("title", "").strip()
href = a_tag.get("href", "").strip()
if title and href:
all_data.append({
'title':title,
'href': href
})
logging.info(f"Extracted: {title} -> {href}")
else:
logging.warning("Missing title or href in an article.")
except Exception as e:
logging.error(f"Error parsing article: {e}")
# 找下一页链接
next_page = soup.find("a", class_="next page-numbers")
if next_page:
current_url = next_page.get("href", "").strip()
else:
current_url = None
logging.info("No more pages to fetch.")
# 等待一段时间以避免被目标网站封禁
time.sleep(2)
except requests.exceptions.RequestException as e:
logging.error(f"Network error while fetching {current_url}: {e}")
break
except Exception as e:
logging.error(f"Unexpected error: {e}")
break
# 保存结果到文件
csv_headers = ["title", "href"]
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_headers)
writer.writeheader()
writer.writerows(all_data)
logging.info(f"Data successfully saved to {output_file}.")
def main():
if len(sys.argv) < 2:
print("Usage: python script.py <cmd>")
print("cmd: scenes, pornstars")
sys.exit(1)
cmd = sys.argv[1]
if cmd == "scenes":
get_scenes(list_url_scenes) # 之前已经实现的获取列表功能
elif cmd == "pornstars":
get_pornstars(list_url_pornstars) # 之前已经实现的获取详情功能
else:
print(f"Unknown command: {cmd}")
if __name__ == '__main__':
main()