From a6f99a2551397aeaa704331bdde0a1b9a0625c98 Mon Sep 17 00:00:00 2001
From: oscarz <oscar@vip.qq.com>
Date: Thu, 24 Apr 2025 14:48:02 +0800
Subject: [PATCH] modify scripts

---
 iafd/src/fetch.py         | 21 ++++++++++++++++++---
 iafd/src/sqlite_utils.py  |  8 ++++----
 javdb/src/fetch.py        | 17 ++++++++++++++---
 javdb/src/sqlite_utils.py |  4 ++--
 4 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/iafd/src/fetch.py b/iafd/src/fetch.py
index 88943af..001bab6 100644
--- a/iafd/src/fetch.py
+++ b/iafd/src/fetch.py
@@ -14,6 +14,7 @@ config.setup_logging()
 
 debug = False
 force = False
+skip_local = True
 
 # 按星座获取演员列表，无翻页
 def fetch_performers_by_astro():    
@@ -242,8 +243,13 @@ def fetch_performers_detail_once(perfomers_list):
     for performer in perfomers_list:
         url = performer['href']
         person = performer['name']
+        curr_id = performer['id']
         logging.debug(f"Fetching data for performer ({person}), url {url} ...")
         soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id"))
+        # 从本地读取的文件，忽略
+        if skip_local and status_code == 99 :
+            last_performer_id = curr_id
+            continue
         if soup:
             data = scraper.parse_page_performer(soup, url)
             if data:
@@ -326,8 +332,13 @@ def fetch_movies_detail():
         for movie in movies_list:
             url = movie['href']
             title = movie['title']
-            logging.debug(f"Fetching data for movie ({title}), url {url} ...")
+            curr_id = movie['id']
+            logging.debug(f"Fetching data for movie: {curr_id}: ({title}), url {url} ...")
             soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="col-xs-12 col-sm-3", attr_type="class"))
+            # 从本地读取的文件，忽略
+            if skip_local and status_code == 99 :
+                last_movie_id = curr_id
+                continue
             if soup:
                 movie_data = scraper.parse_page_movie(soup, url, title)
                 if movie_data :
@@ -378,7 +389,7 @@ function_map = {
 }   
 
 # 主函数
-def main(cmd, args_debug, args_force):
+def main(cmd, args_debug, args_force, args_skip_local):
     global debug
     debug = args_debug
     if debug:
@@ -388,6 +399,9 @@ def main(cmd, args_debug, args_force):
     global force
     force = args_force
 
+    global skip_local
+    skip_local = args_skip_local
+
     # 开启任务
     task_id = db_tools.insert_task_log()
     if task_id is None:
@@ -431,6 +445,7 @@ if __name__ == "__main__":
     parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
     parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
     parser.add_argument('--force', action='store_true', help='force update (true for rewrite all)')
+    parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)')
     args = parser.parse_args()
     
-    main(args.cmd, args.debug, args.force)
+    main(args.cmd, args.debug, args.force, args.skip_local)
diff --git a/iafd/src/sqlite_utils.py b/iafd/src/sqlite_utils.py
index b47e380..7a3ccbc 100644
--- a/iafd/src/sqlite_utils.py
+++ b/iafd/src/sqlite_utils.py
@@ -329,7 +329,7 @@ def query_performer(identifier):
 # 按条件查询 href 列表 
 def query_performer_hrefs(**filters):
     try:
-        sql = "SELECT href, name FROM iafd_performers WHERE 1=1"
+        sql = "SELECT href, name, id FROM iafd_performers WHERE 1=1"
         params = []
 
         if "id" in filters:
@@ -374,7 +374,7 @@ def query_performer_hrefs(**filters):
         logging.debug(f"query sql: {sql}")
         cursor.execute(sql, params)
         #return [row[0].lower() for row in cursor.fetchall()]   # 返回小写
-        return [{'href': row[0], 'name': row[1]} for row in cursor.fetchall()]
+        return [{'href': row[0], 'name': row[1], 'id':row[2]} for row in cursor.fetchall()]
 
     except sqlite3.Error as e:
         logging.error(f"查询 href 失败: {e}")
@@ -756,7 +756,7 @@ def query_movies(identifier):
 # 按条件查询 href 列表 
 def query_movie_hrefs(**filters):
     try:
-        sql = "SELECT href, title FROM iafd_movies WHERE 1=1"
+        sql = "SELECT href, title, id FROM iafd_movies WHERE 1=1"
         params = []
 
         if "id" in filters:
@@ -802,7 +802,7 @@ def query_movie_hrefs(**filters):
         logging.debug(f"query sql: {sql}")
         cursor.execute(sql, params)
         #return [row[0].lower() for row in cursor.fetchall()]    # 链接使用小写
-        return [{'href': row[0], 'title': row[1]} for row in cursor.fetchall()]
+        return [{'href': row[0], 'title': row[1], 'id':row[2]} for row in cursor.fetchall()]
 
     except sqlite3.Error as e:
         logging.error(f"查询 href 失败: {e}")
diff --git a/javdb/src/fetch.py b/javdb/src/fetch.py
index 781fb5a..cf4824e 100644
--- a/javdb/src/fetch.py
+++ b/javdb/src/fetch.py
@@ -14,6 +14,7 @@ config.setup_logging()
 
 debug = False
 force = False
+skip_local = True
 
 # 获取演员列表
 def fetch_actor_list():    
@@ -236,8 +237,14 @@ def fetch_movies_detail():
         for movie in movies_list:
             url = movie['href']
             title = movie['title']
+            curr_id = movie['id']
             logging.debug(f"Fetching data for movie ({title}), url {url} ...")
             soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="video-meta-panel", attr_type="class"))
+            # 从本地读取的文件，忽略
+            if skip_local and status_code == 99 :
+                last_movie_id = curr_id
+                continue
+            # 解析页面，写入数据库
             if soup:
                 movie_data = scraper.parse_movie_detail(soup, url, title)
                 if movie_data :
@@ -278,7 +285,7 @@ function_map = {
 }   
 
 # 主函数
-def main(cmd, args_debug, args_force):
+def main(cmd, args_debug, args_force, args_skip_local):
     global debug
     debug = args_debug
     if debug:
@@ -288,13 +295,16 @@ def main(cmd, args_debug, args_force):
     global force
     force = args_force
 
+    global skip_local
+    skip_local = args_skip_local
+
     # 开启任务
     task_id = db_tools.insert_task_log()
     if task_id is None:
         logging.warning(f'insert task log error.')
         return None
     
-    logging.info(f'running task. id: {task_id}, debug: {debug}, force: {force}, cmd: {cmd}')
+    logging.info(f'running task. id: {task_id}, debug: {debug}, force: {force}, skip_local: {skip_local}, cmd: {cmd}')
 
     # 执行指定的函数
     if cmd:
@@ -328,6 +338,7 @@ if __name__ == "__main__":
     parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
     parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
     parser.add_argument('--force', action='store_true', help='force update (true for rewrite all)')
+    parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)')
     args = parser.parse_args()
     
-    main(args.cmd, args.debug, args.force)
+    main(args.cmd, args.debug, args.force, args.skip_local)
diff --git a/javdb/src/sqlite_utils.py b/javdb/src/sqlite_utils.py
index cff7943..864f798 100644
--- a/javdb/src/sqlite_utils.py
+++ b/javdb/src/sqlite_utils.py
@@ -595,7 +595,7 @@ def query_movies(identifier):
 # 按条件查询 href 列表 
 def query_movie_hrefs(**filters):
     try:
-        sql = "SELECT href, title FROM javdb_movies WHERE 1=1"
+        sql = "SELECT href, title, id FROM javdb_movies WHERE 1=1"
         params = []
 
         if "id" in filters:
@@ -643,7 +643,7 @@ def query_movie_hrefs(**filters):
 
         cursor.execute(sql, params)
         #return [row[0].lower() for row in cursor.fetchall()]    # 链接使用小写
-        return [{'href': row[0], 'title': row[1]} for row in cursor.fetchall()]
+        return [{'href': row[0], 'title': row[1], 'id':row[2]} for row in cursor.fetchall()]
 
     except sqlite3.Error as e:
         logging.error(f"查询 href 失败: {e}")