From 4c80e72a98ca4ed7ac2c41fa89d3e71a20330927 Mon Sep 17 00:00:00 2001 From: oscarz Date: Fri, 25 Apr 2025 10:30:25 +0800 Subject: [PATCH] modify scripts --- javdb/src/fetch.py | 18 +++++++++++------- javdb/src/sqlite_utils.py | 28 +++++++++++++++++----------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/javdb/src/fetch.py b/javdb/src/fetch.py index 0392a04..c8262d1 100644 --- a/javdb/src/fetch.py +++ b/javdb/src/fetch.py @@ -97,7 +97,9 @@ def fetch_movies_by_maker(): if debug: url_list = db_tools.query_maker_hrefs(name='muramura') - for url in url_list: + for row in url_list: + url = row['href'] + row_id = row['id'] # 去掉可下载的标志(如果有) next_url = utils.remove_url_query(url) while next_url: @@ -107,7 +109,7 @@ def fetch_movies_by_maker(): list_data, next_url = scraper.parse_maker_detail(soup, next_url) if list_data: for movie in list_data: - tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_makers=1) + tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_makers=1, maker_id=row_id) if tmp_id: logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}') else: @@ -132,7 +134,9 @@ def fetch_movies_by_series(): if debug: url_list = db_tools.query_series_hrefs(name='10musume') - for url in url_list: + for row in url_list: + url = row['href'] + row_id = row['id'] # 去掉可下载的标志(如果有) next_url = utils.remove_url_query(url) while next_url: @@ -142,7 +146,7 @@ def fetch_movies_by_series(): list_data, next_url = scraper.parse_series_detail(soup, next_url) if list_data: for movie in list_data: - tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_series=1) + tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_series=1, series_id=row_id) if tmp_id: logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}') else: @@ -317,14 +321,14 @@ function_map = { } # 主函数 -def main(cmd): +def main(cmd, args): # 开启任务 task_id = db_tools.insert_task_log() if task_id is None: logging.warning(f'insert task log error.') return None - logging.info(f'running task. id: {task_id}, debug: {debug}, force: {force}, skip_local: {skip_local}, cmd: {cmd}') + logging.info(f"running task. id: {task_id}, args: {args}") # 执行指定的函数 if cmd: @@ -388,4 +392,4 @@ if __name__ == "__main__": args = parser.parse_args() set_env(args) - main(args.cmd) + main(args.cmd, args) diff --git a/javdb/src/sqlite_utils.py b/javdb/src/sqlite_utils.py index 088b5bc..06bed7d 100644 --- a/javdb/src/sqlite_utils.py +++ b/javdb/src/sqlite_utils.py @@ -64,19 +64,21 @@ def insert_actor_index(name, href, from_actor_list=None, from_movie_list=None): logging.error(f"未知错误: {e}") return None -def insert_movie_index(title, href, from_actor_list=None, from_movie_makers=None, from_movie_series=None): +def insert_movie_index(title, href, from_actor_list=None, from_movie_makers=None, from_movie_series=None, maker_id=None, series_id=None): try: # **先检查数据库中是否已有该电影** - cursor.execute("SELECT id, from_actor_list, from_movie_makers, from_movie_series FROM javdb_movies WHERE href = ?", (href,)) + cursor.execute("SELECT id, from_actor_list, from_movie_makers, from_movie_series, maker_id, series_id FROM javdb_movies WHERE href = ?", (href,)) existing_movie = cursor.fetchone() if existing_movie: # **如果电影已存在** - movie_id, existing_actor, existing_maker, existing_series = existing_movie + movie_id, existing_actor, existing_maker, existing_series, existing_maker_id, existing_series_id = existing_movie # **如果没有传入值,就用原来的值** from_actor_list = from_actor_list if from_actor_list is not None else existing_actor from_movie_makers = from_movie_makers if from_movie_makers is not None else existing_maker from_movie_series = from_movie_series if from_movie_series is not None else existing_series + maker_id = maker_id if maker_id is not None else existing_maker_id + series_id = series_id if series_id is not None else existing_series_id cursor.execute(""" UPDATE javdb_movies @@ -84,14 +86,16 @@ def insert_movie_index(title, href, from_actor_list=None, from_movie_makers=None from_actor_list = ?, from_movie_makers = ?, from_movie_series = ?, + maker_id = ?, + series_id = ?, updated_at = datetime('now', 'localtime') WHERE href = ? - """, (title, from_actor_list, from_movie_makers, from_movie_series, href)) + """, (title, from_actor_list, from_movie_makers, from_movie_series, maker_id, series_id, href)) else: # **如果电影不存在,插入** cursor.execute(""" - INSERT INTO javdb_movies (title, href, from_actor_list, from_movie_makers, from_movie_series) - VALUES (?, ?, COALESCE(?, 0), COALESCE(?, 0), COALESCE(?, 0)) - """, (title, href, from_actor_list, from_movie_makers, from_movie_series)) + INSERT INTO javdb_movies (title, href, from_actor_list, from_movie_makers, from_movie_series, maker_id, series_id) + VALUES (?, ?, COALESCE(?, 0), COALESCE(?, 0), COALESCE(?, 0), COALESCE(?, 0), COALESCE(?, 0)) + """, (title, href, from_actor_list, from_movie_makers, from_movie_series, maker_id, series_id)) conn.commit() @@ -344,7 +348,7 @@ def query_maker(identifier): # 按条件查询 href 列表 def query_maker_hrefs(**filters): try: - sql = "SELECT href FROM javdb_makers WHERE 1=1" + sql = "SELECT href, id FROM javdb_makers WHERE 1=1" params = [] if "id" in filters: @@ -361,7 +365,8 @@ def query_maker_hrefs(**filters): params.append(f"%{filters['name']}%") cursor.execute(sql, params) - return [row[0] for row in cursor.fetchall()] # 链接使用小写 + #return [row[0] for row in cursor.fetchall()] # 链接使用小写 + return [{'href': row[0], 'id': row[1]} for row in cursor.fetchall()] except sqlite3.Error as e: logging.error(f"查询 href 失败: {e}") @@ -441,7 +446,7 @@ def query_series(identifier): # 按条件查询 href 列表 def query_series_hrefs(**filters): try: - sql = "SELECT href FROM javdb_series WHERE 1=1" + sql = "SELECT href, id FROM javdb_series WHERE 1=1" params = [] if "id" in filters: @@ -458,7 +463,8 @@ def query_series_hrefs(**filters): params.append(f"%{filters['name']}%") cursor.execute(sql, params) - return [row[0] for row in cursor.fetchall()] # 链接使用小写 + #return [row[0] for row in cursor.fetchall()] # 链接使用小写 + return [{'href': row[0], 'id': row[1]} for row in cursor.fetchall()] except sqlite3.Error as e: logging.error(f"查询 href 失败: {e}")