modify scripts

2025-04-25 16:23:01 +08:00
parent 4c80e72a98
commit 40b5d3f99a
3 changed files with 198 additions and 11 deletions
--- a/javdb/src/fetch.py
+++ b/javdb/src/fetch.py
@ -161,6 +161,42 @@ def fetch_movies_by_series():
            if debug:
                return True

+# 更新series列表中的影片信息
+def fetch_movies_by_publishers():
+    if fast_mode:
+        url_list = db_tools.query_publishers_hrefs(from_list=1)
+    else:
+        url_list = db_tools.query_publishers_hrefs()
+
+    if debug:
+        url_list = db_tools.query_publishers_hrefs(limit=1)
+    for row in url_list:
+        url = row['href']
+        row_id = row['id']
+        # 去掉可下载的标志（如果有）
+        next_url = utils.remove_url_query(url)
+        while next_url:
+            logging.info(f"Fetching data for publisher url {next_url} ...")
+            soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="modal-card", attr_type="class"))
+            if soup:
+                list_data, next_url = scraper.parse_publisher_detail(soup, next_url)
+                if list_data:                
+                    for movie in list_data:
+                        tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_publishers=1, pub_id=row_id)
+                        if tmp_id:
+                            logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}')
+                        else:
+                            logging.warning(f'insert movie index failed. title: {movie['title']}, href: {movie['href']}')
+                else :
+                    logging.warning(f'parse_page_movie error. url: {next_url}')
+            elif status_code  and status_code == 404:
+                logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}')
+                break
+
+            # 调试增加brak
+            if debug:
+                return True
+

 # 更新演员信息
 def fetch_performers_detail():
@ -316,6 +352,7 @@ function_map = {
    "series_list": fetch_series_list,
    "makers": fetch_movies_by_maker,
    "series" : fetch_movies_by_series,
+    "pub" : fetch_movies_by_publishers,
    "movies" : fetch_movies_detail,
    "actors" : fetch_performers_detail,
 }