modify scripts
This commit is contained in:
@ -17,6 +17,7 @@ force = False
|
|||||||
skip_local = False
|
skip_local = False
|
||||||
from_actor = False
|
from_actor = False
|
||||||
abnormal_only = False
|
abnormal_only = False
|
||||||
|
fast_mode = False
|
||||||
|
|
||||||
# 获取演员列表
|
# 获取演员列表
|
||||||
def fetch_actor_list():
|
def fetch_actor_list():
|
||||||
@ -51,7 +52,7 @@ def fetch_makers_list():
|
|||||||
if list_data :
|
if list_data :
|
||||||
# 写入数据库
|
# 写入数据库
|
||||||
for row in list_data:
|
for row in list_data:
|
||||||
maker_id = db_tools.insert_or_update_makers(row)
|
maker_id = db_tools.insert_or_update_makers(row, caller='list')
|
||||||
if maker_id:
|
if maker_id:
|
||||||
logging.debug(f'insert maker to db. maker_id:{maker_id}, name: {row['name']}, href:{row['href']}')
|
logging.debug(f'insert maker to db. maker_id:{maker_id}, name: {row['name']}, href:{row['href']}')
|
||||||
else:
|
else:
|
||||||
@ -74,7 +75,7 @@ def fetch_series_list():
|
|||||||
if list_data :
|
if list_data :
|
||||||
# 写入数据库
|
# 写入数据库
|
||||||
for row in list_data:
|
for row in list_data:
|
||||||
maker_id = db_tools.insert_or_update_series(row)
|
maker_id = db_tools.insert_or_update_series(row, caller='list')
|
||||||
if maker_id:
|
if maker_id:
|
||||||
logging.debug(f'insert series to db. maker_id:{maker_id}, name: {row['name']}, href:{row['href']}')
|
logging.debug(f'insert series to db. maker_id:{maker_id}, name: {row['name']}, href:{row['href']}')
|
||||||
else:
|
else:
|
||||||
@ -89,7 +90,11 @@ def fetch_series_list():
|
|||||||
|
|
||||||
# 更新makers列表中的影片信息
|
# 更新makers列表中的影片信息
|
||||||
def fetch_movies_by_maker():
|
def fetch_movies_by_maker():
|
||||||
url_list = db_tools.query_maker_hrefs()
|
if fast_mode:
|
||||||
|
url_list = db_tools.query_maker_hrefs(from_list=1)
|
||||||
|
else:
|
||||||
|
url_list = db_tools.query_maker_hrefs()
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
url_list = db_tools.query_maker_hrefs(name='muramura')
|
url_list = db_tools.query_maker_hrefs(name='muramura')
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
@ -120,7 +125,11 @@ def fetch_movies_by_maker():
|
|||||||
|
|
||||||
# 更新series列表中的影片信息
|
# 更新series列表中的影片信息
|
||||||
def fetch_movies_by_series():
|
def fetch_movies_by_series():
|
||||||
url_list = db_tools.query_series_hrefs()
|
if fast_mode:
|
||||||
|
url_list = db_tools.query_series_hrefs(from_list=1)
|
||||||
|
else:
|
||||||
|
url_list = db_tools.query_series_hrefs()
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
url_list = db_tools.query_series_hrefs(name='10musume')
|
url_list = db_tools.query_series_hrefs(name='10musume')
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
@ -308,25 +317,7 @@ function_map = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 主函数
|
# 主函数
|
||||||
def main(cmd, args_debug, args_force, args_skip_local, args_from_actor, args_abnormal_only):
|
def main(cmd):
|
||||||
global debug
|
|
||||||
debug = args_debug
|
|
||||||
if debug:
|
|
||||||
logger = logging.getLogger()
|
|
||||||
logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
global force
|
|
||||||
force = args_force
|
|
||||||
|
|
||||||
global skip_local
|
|
||||||
skip_local = args_skip_local
|
|
||||||
|
|
||||||
global from_actor
|
|
||||||
from_actor = args_from_actor
|
|
||||||
|
|
||||||
global abnormal_only
|
|
||||||
abnormal_only = args_abnormal_only
|
|
||||||
|
|
||||||
# 开启任务
|
# 开启任务
|
||||||
task_id = db_tools.insert_task_log()
|
task_id = db_tools.insert_task_log()
|
||||||
if task_id is None:
|
if task_id is None:
|
||||||
@ -359,6 +350,29 @@ def main(cmd, args_debug, args_force, args_skip_local, args_from_actor, args_abn
|
|||||||
# TODO:
|
# TODO:
|
||||||
# 1,
|
# 1,
|
||||||
|
|
||||||
|
# 设置环境变量
|
||||||
|
def set_env(args):
|
||||||
|
global debug
|
||||||
|
debug = args.debug
|
||||||
|
if debug:
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
global force
|
||||||
|
force = args.force
|
||||||
|
|
||||||
|
global skip_local
|
||||||
|
skip_local = args.skip_local
|
||||||
|
|
||||||
|
global from_actor
|
||||||
|
from_actor = args.from_actor
|
||||||
|
|
||||||
|
global abnormal_only
|
||||||
|
abnormal_only = args.abnormal_only
|
||||||
|
|
||||||
|
global fast_mode
|
||||||
|
fast_mode = args.fast_mode
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# 命令行参数处理
|
# 命令行参数处理
|
||||||
keys_str = ",".join(function_map.keys())
|
keys_str = ",".join(function_map.keys())
|
||||||
@ -370,6 +384,8 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)')
|
parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)')
|
||||||
parser.add_argument('--from_actor', action='store_true', help='只遍历来自 actor_list 的 演员或者影片 (在force模式下有效)')
|
parser.add_argument('--from_actor', action='store_true', help='只遍历来自 actor_list 的 演员或者影片 (在force模式下有效)')
|
||||||
parser.add_argument('--abnormal_only', action='store_true', help='只遍历异常URL(404或者需要登陆查看等) 的 演员或影片 (在force模式下有效)')
|
parser.add_argument('--abnormal_only', action='store_true', help='只遍历异常URL(404或者需要登陆查看等) 的 演员或影片 (在force模式下有效)')
|
||||||
|
parser.add_argument('--fast_mode', action='store_true', help='只遍历所有 uncensored 的 makers 和 series ')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
main(args.cmd, args.debug, args.force, args.skip_local, args.from_actor, args.abnormal_only)
|
set_env(args)
|
||||||
|
main(args.cmd)
|
||||||
|
|||||||
@ -9,6 +9,8 @@ DB_PATH = f"{config.global_share_data_dir}/sqlite/shared.db" # 替换为你的
|
|||||||
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
|
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cached_tags = {}
|
||||||
|
|
||||||
# """从指定表中通过 href 查找 id"""
|
# """从指定表中通过 href 查找 id"""
|
||||||
def get_id_by_href(table: str, href: str) -> int:
|
def get_id_by_href(table: str, href: str) -> int:
|
||||||
if href is None:
|
if href is None:
|
||||||
@ -269,16 +271,31 @@ def query_actors(**filters):
|
|||||||
|
|
||||||
|
|
||||||
# 插入或更新发行商 """
|
# 插入或更新发行商 """
|
||||||
def insert_or_update_makers(data):
|
def insert_or_update_makers(data, caller='list'):
|
||||||
try:
|
try:
|
||||||
cursor.execute("""
|
if caller == 'list':
|
||||||
INSERT INTO javdb_makers (name, href, updated_at)
|
cursor.execute("""
|
||||||
VALUES (?, ? , datetime('now', 'localtime'))
|
INSERT INTO javdb_makers (name, href, from_list, updated_at)
|
||||||
ON CONFLICT(href) DO UPDATE SET
|
VALUES (?, ? , 1, datetime('now', 'localtime'))
|
||||||
name = excluded.name,
|
ON CONFLICT(href) DO UPDATE SET
|
||||||
updated_at = datetime('now', 'localtime')
|
name = excluded.name,
|
||||||
""", (data["name"], data["href"]))
|
from_list = 1,
|
||||||
conn.commit()
|
updated_at = datetime('now', 'localtime')
|
||||||
|
""", (data["name"], data["href"]))
|
||||||
|
conn.commit()
|
||||||
|
elif caller == 'movie':
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO javdb_makers (name, href, from_movie_list, updated_at)
|
||||||
|
VALUES (?, ? , 1, datetime('now', 'localtime'))
|
||||||
|
ON CONFLICT(href) DO UPDATE SET
|
||||||
|
name = excluded.name,
|
||||||
|
from_movie_list = 1,
|
||||||
|
updated_at = datetime('now', 'localtime')
|
||||||
|
""", (data["name"], data["href"]))
|
||||||
|
conn.commit()
|
||||||
|
else:
|
||||||
|
logging.warning(f"unexpected caller: {caller}")
|
||||||
|
return None
|
||||||
|
|
||||||
# 获取 performer_id
|
# 获取 performer_id
|
||||||
cursor.execute("SELECT id FROM javdb_makers WHERE href = ?", (data["href"],))
|
cursor.execute("SELECT id FROM javdb_makers WHERE href = ?", (data["href"],))
|
||||||
@ -333,6 +350,9 @@ def query_maker_hrefs(**filters):
|
|||||||
if "id" in filters:
|
if "id" in filters:
|
||||||
sql += " AND id = ?"
|
sql += " AND id = ?"
|
||||||
params.append(filters["id"])
|
params.append(filters["id"])
|
||||||
|
if "from_list" in filters:
|
||||||
|
sql += " AND from_list = ?"
|
||||||
|
params.append(filters["from_list"])
|
||||||
if "url" in filters:
|
if "url" in filters:
|
||||||
sql += " AND href = ?"
|
sql += " AND href = ?"
|
||||||
params.append(filters["href"])
|
params.append(filters["href"])
|
||||||
@ -348,16 +368,31 @@ def query_maker_hrefs(**filters):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# """ 插入或更新制作公司 """
|
# """ 插入或更新制作公司 """
|
||||||
def insert_or_update_series(data):
|
def insert_or_update_series(data, caller='list'):
|
||||||
try:
|
try:
|
||||||
cursor.execute("""
|
if caller == 'list':
|
||||||
INSERT INTO javdb_series (name, href, updated_at)
|
cursor.execute("""
|
||||||
VALUES (?, ?, datetime('now', 'localtime'))
|
INSERT INTO javdb_series (name, href, from_list, updated_at)
|
||||||
ON CONFLICT(href) DO UPDATE SET
|
VALUES (?, ? , 1, datetime('now', 'localtime'))
|
||||||
name = excluded.name,
|
ON CONFLICT(href) DO UPDATE SET
|
||||||
updated_at = datetime('now', 'localtime')
|
name = excluded.name,
|
||||||
""", (data["name"], data["href"]))
|
from_list = 1,
|
||||||
conn.commit()
|
updated_at = datetime('now', 'localtime')
|
||||||
|
""", (data["name"], data["href"]))
|
||||||
|
conn.commit()
|
||||||
|
elif caller == 'movie':
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO javdb_series (name, href, from_movie_list, updated_at)
|
||||||
|
VALUES (?, ? , 1, datetime('now', 'localtime'))
|
||||||
|
ON CONFLICT(href) DO UPDATE SET
|
||||||
|
name = excluded.name,
|
||||||
|
from_movie_list = 1,
|
||||||
|
updated_at = datetime('now', 'localtime')
|
||||||
|
""", (data["name"], data["href"]))
|
||||||
|
conn.commit()
|
||||||
|
else:
|
||||||
|
logging.warning(f"unexpected caller: {caller}")
|
||||||
|
return None
|
||||||
|
|
||||||
# 获取 performer_id
|
# 获取 performer_id
|
||||||
cursor.execute("SELECT id FROM javdb_series WHERE href = ?", (data["href"],))
|
cursor.execute("SELECT id FROM javdb_series WHERE href = ?", (data["href"],))
|
||||||
@ -412,6 +447,9 @@ def query_series_hrefs(**filters):
|
|||||||
if "id" in filters:
|
if "id" in filters:
|
||||||
sql += " AND id = ?"
|
sql += " AND id = ?"
|
||||||
params.append(filters["id"])
|
params.append(filters["id"])
|
||||||
|
if "from_list" in filters:
|
||||||
|
sql += " AND from_list = ?"
|
||||||
|
params.append(filters["from_list"])
|
||||||
if "href" in filters:
|
if "href" in filters:
|
||||||
sql += " AND href = ?"
|
sql += " AND href = ?"
|
||||||
params.append(filters["href"])
|
params.append(filters["href"])
|
||||||
@ -430,6 +468,9 @@ def query_series_hrefs(**filters):
|
|||||||
# 插入或更新类别 """
|
# 插入或更新类别 """
|
||||||
def insert_or_update_tags(name, href):
|
def insert_or_update_tags(name, href):
|
||||||
try:
|
try:
|
||||||
|
if href in cached_tags:
|
||||||
|
return cached_tags[href]['id']
|
||||||
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
INSERT INTO javdb_tags (name, href, updated_at)
|
INSERT INTO javdb_tags (name, href, updated_at)
|
||||||
VALUES (?, ? , datetime('now', 'localtime'))
|
VALUES (?, ? , datetime('now', 'localtime'))
|
||||||
@ -439,10 +480,12 @@ def insert_or_update_tags(name, href):
|
|||||||
""", (name, href))
|
""", (name, href))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
# 获取 performer_id
|
cursor.execute("SELECT id, name, href FROM javdb_tags")
|
||||||
cursor.execute("SELECT id FROM javdb_tags WHERE href = ?", (href,))
|
for row in cursor.fetchall():
|
||||||
dist_id = cursor.fetchone()[0]
|
cached_tags[row[2]] = {'id': row[0], 'name':row[2]}
|
||||||
if dist_id:
|
|
||||||
|
if href in cached_tags:
|
||||||
|
dist_id = cached_tags[href]['id']
|
||||||
logging.debug(f"insert/update tags succ. id: {dist_id}, name: {name}")
|
logging.debug(f"insert/update tags succ. id: {dist_id}, name: {name}")
|
||||||
return dist_id
|
return dist_id
|
||||||
else:
|
else:
|
||||||
@ -452,6 +495,42 @@ def insert_or_update_tags(name, href):
|
|||||||
logging.error(f"数据库错误: {e}")
|
logging.error(f"数据库错误: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# 查询tags
|
||||||
|
def query_tags(href, name):
|
||||||
|
global cached_tags
|
||||||
|
try:
|
||||||
|
if href not in cached_tags:
|
||||||
|
cursor.execute("SELECT id, name, href FROM javdb_tags")
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
cached_tags[row[2]] = {'id': row[0], 'name':row[2]}
|
||||||
|
|
||||||
|
if href in cached_tags:
|
||||||
|
return cached_tags[href]['id'], cached_tags[href]['name']
|
||||||
|
except sqlite3.Error as e:
|
||||||
|
logging.error(f"查询失败: {e}")
|
||||||
|
return 0, name
|
||||||
|
|
||||||
|
# 插入影片和tags的关联数据
|
||||||
|
def insert_movie_tags( movie_id, tag_id, tags=''):
|
||||||
|
try:
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO javdb_movies_tags (movie_id, tag_id, tags, updated_at)
|
||||||
|
VALUES (?, ?, ?, datetime('now', 'localtime'))
|
||||||
|
ON CONFLICT(tag_id, movie_id) DO UPDATE SET tags=excluded.tags, updated_at=datetime('now', 'localtime')
|
||||||
|
""",
|
||||||
|
(movie_id, tag_id, tags)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
#logging.debug(f'insert one performer_movie, performer_id: {performer_id}, movie_id: {movie_id}')
|
||||||
|
|
||||||
|
return movie_id
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
logging.error("Error inserting movie: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
# """插入或更新电影数据"""
|
# """插入或更新电影数据"""
|
||||||
def insert_or_update_movie(movie):
|
def insert_or_update_movie(movie):
|
||||||
try:
|
try:
|
||||||
@ -461,9 +540,9 @@ def insert_or_update_movie(movie):
|
|||||||
|
|
||||||
# 如果不存在,插入
|
# 如果不存在,插入
|
||||||
if makers_id is None and movie['maker_link']:
|
if makers_id is None and movie['maker_link']:
|
||||||
makers_id = insert_or_update_makers({'name' : movie.get('maker_name', ''), 'href' : movie.get('maker_link', '')})
|
makers_id = insert_or_update_makers({'name' : movie.get('maker_name', ''), 'href' : movie.get('maker_link', '')}, caller='movie')
|
||||||
if series_id is None and movie['series_link']:
|
if series_id is None and movie['series_link']:
|
||||||
series_id = insert_or_update_series({'name' : movie.get('series_name', ''), 'href' : movie.get('series_link', '')})
|
series_id = insert_or_update_series({'name' : movie.get('series_name', ''), 'href' : movie.get('series_link', '')}, caller='movie')
|
||||||
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
INSERT INTO javdb_movies (href, title, cover_url, serial_number, release_date, duration,
|
INSERT INTO javdb_movies (href, title, cover_url, serial_number, release_date, duration,
|
||||||
@ -513,7 +592,14 @@ def insert_or_update_movie(movie):
|
|||||||
tag_href = tag.get('href', '')
|
tag_href = tag.get('href', '')
|
||||||
tag_id = insert_or_update_tags(tag_name, tag_href)
|
tag_id = insert_or_update_tags(tag_name, tag_href)
|
||||||
if tag_id:
|
if tag_id:
|
||||||
logging.debug(f"insert one tags. tag_id: {tag_id}, name:{tag_name}")
|
logging.debug(f"insert one tags. tag_id: {tag_id}, name: {tag_name}")
|
||||||
|
tmp_id = insert_movie_tags(movie_id=movie_id, tag_id=tag_id, tags=tag_name)
|
||||||
|
if tmp_id:
|
||||||
|
logging.debug(f"insert one movie_tag. movie_id: {movie_id}, tag_id: {tag_id}, name: {tag_name}")
|
||||||
|
else:
|
||||||
|
logging.warning(f"insert one movie_tag error. movie_id: {movie_id}, tag_id: {tag_id}, name: {tag_name}")
|
||||||
|
else:
|
||||||
|
logging.warning(f"insert tags error. name:{tag_name}, href: {tag_href}")
|
||||||
|
|
||||||
return movie_id
|
return movie_id
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user