diff --git a/javdb/src/fetch.py b/javdb/src/fetch.py index 61bfb6b..0392a04 100644 --- a/javdb/src/fetch.py +++ b/javdb/src/fetch.py @@ -17,6 +17,7 @@ force = False skip_local = False from_actor = False abnormal_only = False +fast_mode = False # 获取演员列表 def fetch_actor_list(): @@ -51,7 +52,7 @@ def fetch_makers_list(): if list_data : # 写入数据库 for row in list_data: - maker_id = db_tools.insert_or_update_makers(row) + maker_id = db_tools.insert_or_update_makers(row, caller='list') if maker_id: logging.debug(f'insert maker to db. maker_id:{maker_id}, name: {row['name']}, href:{row['href']}') else: @@ -74,7 +75,7 @@ def fetch_series_list(): if list_data : # 写入数据库 for row in list_data: - maker_id = db_tools.insert_or_update_series(row) + maker_id = db_tools.insert_or_update_series(row, caller='list') if maker_id: logging.debug(f'insert series to db. maker_id:{maker_id}, name: {row['name']}, href:{row['href']}') else: @@ -89,7 +90,11 @@ def fetch_series_list(): # 更新makers列表中的影片信息 def fetch_movies_by_maker(): - url_list = db_tools.query_maker_hrefs() + if fast_mode: + url_list = db_tools.query_maker_hrefs(from_list=1) + else: + url_list = db_tools.query_maker_hrefs() + if debug: url_list = db_tools.query_maker_hrefs(name='muramura') for url in url_list: @@ -120,7 +125,11 @@ def fetch_movies_by_maker(): # 更新series列表中的影片信息 def fetch_movies_by_series(): - url_list = db_tools.query_series_hrefs() + if fast_mode: + url_list = db_tools.query_series_hrefs(from_list=1) + else: + url_list = db_tools.query_series_hrefs() + if debug: url_list = db_tools.query_series_hrefs(name='10musume') for url in url_list: @@ -308,25 +317,7 @@ function_map = { } # 主函数 -def main(cmd, args_debug, args_force, args_skip_local, args_from_actor, args_abnormal_only): - global debug - debug = args_debug - if debug: - logger = logging.getLogger() - logger.setLevel(logging.DEBUG) - - global force - force = args_force - - global skip_local - skip_local = args_skip_local - - global from_actor - from_actor = args_from_actor - - global abnormal_only - abnormal_only = args_abnormal_only - +def main(cmd): # 开启任务 task_id = db_tools.insert_task_log() if task_id is None: @@ -359,6 +350,29 @@ def main(cmd, args_debug, args_force, args_skip_local, args_from_actor, args_abn # TODO: # 1, +# 设置环境变量 +def set_env(args): + global debug + debug = args.debug + if debug: + logger = logging.getLogger() + logger.setLevel(logging.DEBUG) + + global force + force = args.force + + global skip_local + skip_local = args.skip_local + + global from_actor + from_actor = args.from_actor + + global abnormal_only + abnormal_only = args.abnormal_only + + global fast_mode + fast_mode = args.fast_mode + if __name__ == "__main__": # 命令行参数处理 keys_str = ",".join(function_map.keys()) @@ -370,6 +384,8 @@ if __name__ == "__main__": parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)') parser.add_argument('--from_actor', action='store_true', help='只遍历来自 actor_list 的 演员或者影片 (在force模式下有效)') parser.add_argument('--abnormal_only', action='store_true', help='只遍历异常URL(404或者需要登陆查看等) 的 演员或影片 (在force模式下有效)') + parser.add_argument('--fast_mode', action='store_true', help='只遍历所有 uncensored 的 makers 和 series ') args = parser.parse_args() - main(args.cmd, args.debug, args.force, args.skip_local, args.from_actor, args.abnormal_only) + set_env(args) + main(args.cmd) diff --git a/javdb/src/sqlite_utils.py b/javdb/src/sqlite_utils.py index 864f798..088b5bc 100644 --- a/javdb/src/sqlite_utils.py +++ b/javdb/src/sqlite_utils.py @@ -9,6 +9,8 @@ DB_PATH = f"{config.global_share_data_dir}/sqlite/shared.db" # 替换为你的 conn = sqlite3.connect(DB_PATH, check_same_thread=False) cursor = conn.cursor() +cached_tags = {} + # """从指定表中通过 href 查找 id""" def get_id_by_href(table: str, href: str) -> int: if href is None: @@ -269,16 +271,31 @@ def query_actors(**filters): # 插入或更新发行商 """ -def insert_or_update_makers(data): +def insert_or_update_makers(data, caller='list'): try: - cursor.execute(""" - INSERT INTO javdb_makers (name, href, updated_at) - VALUES (?, ? , datetime('now', 'localtime')) - ON CONFLICT(href) DO UPDATE SET - name = excluded.name, - updated_at = datetime('now', 'localtime') - """, (data["name"], data["href"])) - conn.commit() + if caller == 'list': + cursor.execute(""" + INSERT INTO javdb_makers (name, href, from_list, updated_at) + VALUES (?, ? , 1, datetime('now', 'localtime')) + ON CONFLICT(href) DO UPDATE SET + name = excluded.name, + from_list = 1, + updated_at = datetime('now', 'localtime') + """, (data["name"], data["href"])) + conn.commit() + elif caller == 'movie': + cursor.execute(""" + INSERT INTO javdb_makers (name, href, from_movie_list, updated_at) + VALUES (?, ? , 1, datetime('now', 'localtime')) + ON CONFLICT(href) DO UPDATE SET + name = excluded.name, + from_movie_list = 1, + updated_at = datetime('now', 'localtime') + """, (data["name"], data["href"])) + conn.commit() + else: + logging.warning(f"unexpected caller: {caller}") + return None # 获取 performer_id cursor.execute("SELECT id FROM javdb_makers WHERE href = ?", (data["href"],)) @@ -333,6 +350,9 @@ def query_maker_hrefs(**filters): if "id" in filters: sql += " AND id = ?" params.append(filters["id"]) + if "from_list" in filters: + sql += " AND from_list = ?" + params.append(filters["from_list"]) if "url" in filters: sql += " AND href = ?" params.append(filters["href"]) @@ -348,16 +368,31 @@ def query_maker_hrefs(**filters): return None # """ 插入或更新制作公司 """ -def insert_or_update_series(data): +def insert_or_update_series(data, caller='list'): try: - cursor.execute(""" - INSERT INTO javdb_series (name, href, updated_at) - VALUES (?, ?, datetime('now', 'localtime')) - ON CONFLICT(href) DO UPDATE SET - name = excluded.name, - updated_at = datetime('now', 'localtime') - """, (data["name"], data["href"])) - conn.commit() + if caller == 'list': + cursor.execute(""" + INSERT INTO javdb_series (name, href, from_list, updated_at) + VALUES (?, ? , 1, datetime('now', 'localtime')) + ON CONFLICT(href) DO UPDATE SET + name = excluded.name, + from_list = 1, + updated_at = datetime('now', 'localtime') + """, (data["name"], data["href"])) + conn.commit() + elif caller == 'movie': + cursor.execute(""" + INSERT INTO javdb_series (name, href, from_movie_list, updated_at) + VALUES (?, ? , 1, datetime('now', 'localtime')) + ON CONFLICT(href) DO UPDATE SET + name = excluded.name, + from_movie_list = 1, + updated_at = datetime('now', 'localtime') + """, (data["name"], data["href"])) + conn.commit() + else: + logging.warning(f"unexpected caller: {caller}") + return None # 获取 performer_id cursor.execute("SELECT id FROM javdb_series WHERE href = ?", (data["href"],)) @@ -412,6 +447,9 @@ def query_series_hrefs(**filters): if "id" in filters: sql += " AND id = ?" params.append(filters["id"]) + if "from_list" in filters: + sql += " AND from_list = ?" + params.append(filters["from_list"]) if "href" in filters: sql += " AND href = ?" params.append(filters["href"]) @@ -430,6 +468,9 @@ def query_series_hrefs(**filters): # 插入或更新类别 """ def insert_or_update_tags(name, href): try: + if href in cached_tags: + return cached_tags[href]['id'] + cursor.execute(""" INSERT INTO javdb_tags (name, href, updated_at) VALUES (?, ? , datetime('now', 'localtime')) @@ -439,10 +480,12 @@ def insert_or_update_tags(name, href): """, (name, href)) conn.commit() - # 获取 performer_id - cursor.execute("SELECT id FROM javdb_tags WHERE href = ?", (href,)) - dist_id = cursor.fetchone()[0] - if dist_id: + cursor.execute("SELECT id, name, href FROM javdb_tags") + for row in cursor.fetchall(): + cached_tags[row[2]] = {'id': row[0], 'name':row[2]} + + if href in cached_tags: + dist_id = cached_tags[href]['id'] logging.debug(f"insert/update tags succ. id: {dist_id}, name: {name}") return dist_id else: @@ -452,6 +495,42 @@ def insert_or_update_tags(name, href): logging.error(f"数据库错误: {e}") return None +# 查询tags +def query_tags(href, name): + global cached_tags + try: + if href not in cached_tags: + cursor.execute("SELECT id, name, href FROM javdb_tags") + for row in cursor.fetchall(): + cached_tags[row[2]] = {'id': row[0], 'name':row[2]} + + if href in cached_tags: + return cached_tags[href]['id'], cached_tags[href]['name'] + except sqlite3.Error as e: + logging.error(f"查询失败: {e}") + return 0, name + +# 插入影片和tags的关联数据 +def insert_movie_tags( movie_id, tag_id, tags=''): + try: + cursor.execute(""" + INSERT INTO javdb_movies_tags (movie_id, tag_id, tags, updated_at) + VALUES (?, ?, ?, datetime('now', 'localtime')) + ON CONFLICT(tag_id, movie_id) DO UPDATE SET tags=excluded.tags, updated_at=datetime('now', 'localtime') + """, + (movie_id, tag_id, tags) + ) + conn.commit() + + #logging.debug(f'insert one performer_movie, performer_id: {performer_id}, movie_id: {movie_id}') + + return movie_id + + except Exception as e: + conn.rollback() + logging.error("Error inserting movie: %s", e) + return None + # """插入或更新电影数据""" def insert_or_update_movie(movie): try: @@ -461,9 +540,9 @@ def insert_or_update_movie(movie): # 如果不存在,插入 if makers_id is None and movie['maker_link']: - makers_id = insert_or_update_makers({'name' : movie.get('maker_name', ''), 'href' : movie.get('maker_link', '')}) + makers_id = insert_or_update_makers({'name' : movie.get('maker_name', ''), 'href' : movie.get('maker_link', '')}, caller='movie') if series_id is None and movie['series_link']: - series_id = insert_or_update_series({'name' : movie.get('series_name', ''), 'href' : movie.get('series_link', '')}) + series_id = insert_or_update_series({'name' : movie.get('series_name', ''), 'href' : movie.get('series_link', '')}, caller='movie') cursor.execute(""" INSERT INTO javdb_movies (href, title, cover_url, serial_number, release_date, duration, @@ -513,7 +592,14 @@ def insert_or_update_movie(movie): tag_href = tag.get('href', '') tag_id = insert_or_update_tags(tag_name, tag_href) if tag_id: - logging.debug(f"insert one tags. tag_id: {tag_id}, name:{tag_name}") + logging.debug(f"insert one tags. tag_id: {tag_id}, name: {tag_name}") + tmp_id = insert_movie_tags(movie_id=movie_id, tag_id=tag_id, tags=tag_name) + if tmp_id: + logging.debug(f"insert one movie_tag. movie_id: {movie_id}, tag_id: {tag_id}, name: {tag_name}") + else: + logging.warning(f"insert one movie_tag error. movie_id: {movie_id}, tag_id: {tag_id}, name: {tag_name}") + else: + logging.warning(f"insert tags error. name:{tag_name}, href: {tag_href}") return movie_id