184 lines
6.0 KiB
Python
184 lines
6.0 KiB
Python
|
|
import json
|
|
import time
|
|
import os
|
|
import argparse
|
|
import textwrap
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from functools import partial
|
|
import config
|
|
import scraper
|
|
import utils
|
|
from urllib.parse import urljoin, urlparse
|
|
|
|
config.setup_logging()
|
|
|
|
debug = False
|
|
skip_local = False
|
|
scan_mode = 0
|
|
update_mode = 0
|
|
|
|
current_date_str = datetime.now().strftime("%Y-%m-%d")
|
|
target_csv = f"{config.global_share_data_dir}/u3c3.csv"
|
|
target_torrent_dir = f"{config.global_share_data_dir}/u3c3_torrents"
|
|
|
|
# 获取演员列表
|
|
def fetch_list(start_p=1):
|
|
p = start_p
|
|
total_results = []
|
|
while True:
|
|
url = f"https://u001.25img.com/?p={p}"
|
|
logging.info(f"fetching url {url}")
|
|
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class"))
|
|
if soup:
|
|
list_data, total_pages = scraper.parse_page(soup, url)
|
|
if list_data :
|
|
total_results.extend(list_data)
|
|
else:
|
|
logging.warning(f"fetch_list failed. url: {url} ")
|
|
if total_pages:
|
|
if p >= total_pages:
|
|
url = None
|
|
else:
|
|
p += 1
|
|
time.sleep(1)
|
|
else:
|
|
logging.warning(f"fetch_list failed. url: {url} ")
|
|
url = None
|
|
|
|
else:
|
|
logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}')
|
|
|
|
if debug:
|
|
break
|
|
|
|
# 写入csv文件
|
|
lines = utils.write_to_csv(total_results, target_csv)
|
|
if lines:
|
|
logging.info(f"write to file succ. total lines: {lines}, file: {target_csv}")
|
|
logging.info(f"fetch list finished. total pages: {p}")
|
|
|
|
|
|
# 下载资源
|
|
def down_torrents():
|
|
# 读取CSV数据
|
|
rows = utils.read_csv_data(target_csv)
|
|
if not rows:
|
|
return
|
|
|
|
# 创建主下载目录
|
|
os.makedirs(target_torrent_dir, exist_ok=True)
|
|
|
|
for row in rows:
|
|
title = row.get('title', '')
|
|
torrent_url = row.get('torrent_url', '')
|
|
|
|
# 检查URL是否合法
|
|
if not (torrent_url.startswith('https') and torrent_url.endswith('.torrent')):
|
|
logging.warning(f"跳过非法torrent链接: {torrent_url}")
|
|
continue
|
|
|
|
# 解析文件名
|
|
try:
|
|
parsed_url = urlparse(torrent_url)
|
|
filename = os.path.basename(parsed_url.path)
|
|
|
|
if not filename:
|
|
logging.warning(f"无法从URL解析文件名: {torrent_url}")
|
|
continue
|
|
except Exception as e:
|
|
logging.warning(f"解析URL时出错: {e}")
|
|
continue
|
|
|
|
# 创建子目录(按文件名首字母小写)
|
|
first_char = filename[0].lower()
|
|
subdir = os.path.join(target_torrent_dir, first_char)
|
|
os.makedirs(subdir, exist_ok=True)
|
|
|
|
# 检查文件是否已存在
|
|
local_path = os.path.join(subdir, filename)
|
|
if os.path.exists(local_path):
|
|
logging.info(f"文件已存在,跳过下载: {title}, {local_path}")
|
|
continue
|
|
|
|
succ = scraper.download_torrent(torrent_url, local_path)
|
|
if succ:
|
|
logging.info(f"download succ. {title}, {local_path}")
|
|
|
|
if debug:
|
|
break
|
|
time.sleep(1)
|
|
|
|
# 建立缩写到函数的映射
|
|
function_map = {
|
|
"list": fetch_list,
|
|
"down" : down_torrents,
|
|
}
|
|
|
|
# 主函数
|
|
def main(cmd, args):
|
|
# 执行指定的函数
|
|
if cmd:
|
|
function_names = args.cmd.split(",") # 拆分输入
|
|
for short_name in function_names:
|
|
func = function_map.get(short_name.strip()) # 从映射中获取对应的函数
|
|
if callable(func):
|
|
func()
|
|
else:
|
|
logging.warning(f" {short_name} is not a valid function shortcut.")
|
|
else: # 全量执行
|
|
for name, func in function_map.items():
|
|
if callable(func):
|
|
func()
|
|
else:
|
|
logging.warning(f" {short_name} is not a valid function shortcut.")
|
|
|
|
logging.info(f'all process completed!')
|
|
|
|
# TODO:
|
|
# 1,
|
|
|
|
# 设置环境变量
|
|
def set_env(args):
|
|
global debug
|
|
debug = args.debug
|
|
if debug:
|
|
logger = logging.getLogger()
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
global skip_local
|
|
skip_local = args.skip_local
|
|
|
|
global scan_mode
|
|
scan_mode = args.scan_mode
|
|
|
|
global update_mode
|
|
if args.update:
|
|
update_mode = args.update
|
|
|
|
if __name__ == "__main__":
|
|
# 命令行参数处理
|
|
keys_str = ",".join(function_map.keys())
|
|
|
|
usage_examples = textwrap.dedent('''
|
|
示例用法:
|
|
python3 ./fetch.py # 刷新列表,并下载新增资源
|
|
python3 ./fetch.py --cmd=list # 刷新列表
|
|
python3 ./fetch.py --cmd=down # 并下载新增资源
|
|
''')
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='fetch javhd data.\n\n' + usage_examples,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
)
|
|
#parser = argparse.ArgumentParser(description='fetch javdb data.')
|
|
parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
|
|
parser.add_argument('--update', type=int, choices=[0, 1, 2, 3, 4], default=0, help='0-只遍历is_full_data=0(默认), 1-只遍历is_full_data=1, 2-遍历is_full_data<=1, 3-只遍历is_full_data>1(异常数据), 4-遍历所有')
|
|
parser.add_argument('--scan_mode', type=int, choices=[0, 1, 2], default=1, help='1-只遍历所有 uncensored 的 makers/series/actors/movies(默认), 0-与前者相反, 2-全量')
|
|
parser.add_argument('--skip_local', action='store_true', help='如果本地缓存了页面,则跳过数据库操作')
|
|
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
|
|
args = parser.parse_args()
|
|
|
|
set_env(args)
|
|
main(args.cmd, args) |