From 16fe4ec54e8cf609449967c19568815a92306c53 Mon Sep 17 00:00:00 2001
From: oscarz <oscar@vip.qq.com>
Date: Wed, 23 Apr 2025 19:18:06 +0800
Subject: [PATCH] modify scripts

---
 javdb/src/scraper.py | 55 ++++++++++++++++++++++++++++++++++++++++++--
 javdb/src/utils.py   | 10 ++++++++
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/javdb/src/scraper.py b/javdb/src/scraper.py
index f503038..84867fb 100644
--- a/javdb/src/scraper.py
+++ b/javdb/src/scraper.py
@@ -518,6 +518,44 @@ def parse_maker_detail(soup, href):
     return list_data, next_url
 
 
+# 解析 HTML 内容，提取需要的数据
+def parse_uncensored(soup, href):
+    div_movies = soup.find("div", class_='movie-list h cols-4 vcols-8')
+    if not div_movies:
+        logging.warning(f"Warning: No movies div found ")
+        return [], None
+    
+    # 解析元素
+    rows = div_movies.find_all('div', class_='item')
+
+    list_data = []
+    next_url = None
+    for row in rows:
+        link = row.find('a', class_='box')['href']
+        serial_number = row.find('strong').text.strip()
+        title = row.find('div', class_='video-title').text.strip()
+        release_date = row.find('div', class_='meta').text.strip()
+        list_data.append({
+            'href' : host_url + link if link else '',
+            'serial_number' : serial_number,
+            'title'  : title,
+            'release_date': release_date 
+        })
+    
+    # 查找 "下一页" 按钮
+    next_page_element = soup.find('a', class_='pagination-next')
+    if next_page_element:
+        next_page_url = next_page_element['href']
+        next_page_number = url_page_num(next_page_url)
+        current_page_number = url_page_num(href)
+        if current_page_number is None:
+            current_page_number = 0
+        if next_page_number and next_page_number > current_page_number :
+            next_url = host_url + next_page_url
+
+    return list_data, next_url
+
+
 
 ###### 以下为测试代码 ######
 def test_actors_list():
@@ -591,12 +629,25 @@ def test_series_detail():
     print(all_data)
     
 
+def test_uncensored():
+    next_url = 'https://javdb.com/search?from_recent=1&amp;q=%E6%97%A0%E7%A0%81%E6%B5%81%E5%87%BA'
+    all_data = []
+    while next_url:
+        print(f'fetching page {next_url}')
+        soup, status_code = fetch_page(next_url, partial(generic_validator, tag="div", identifier="movie-list h cols-4 vcols-8", attr_type="class"))
+        if soup:
+            list_data, next_url = parse_uncensored(soup, next_url)
+            if list_data :
+                all_data.extend(list_data)
+            else:
+                print('get wrong page.')
+    utils.json_to_csv(all_data, 'uncensored.csv')
 
 if __name__ == "__main__":
     #test_actors_list()
     #test_actor()
-    test_movie_detail()
+    #test_movie_detail()
     #test_series_list()
     #test_series_detail()
-
+    test_uncensored()
     
\ No newline at end of file
diff --git a/javdb/src/utils.py b/javdb/src/utils.py
index c8e0ff9..c122e20 100644
--- a/javdb/src/utils.py
+++ b/javdb/src/utils.py
@@ -106,3 +106,13 @@ def remove_url_query(url: str) -> str:
     except Exception as e:
         print(f"解析 URL 失败: {e}")
         return url
+# 写csv文件
+def json_to_csv(data, output_file):
+    if not data:
+        return
+    headers = list(data[0].keys())
+    with open(f"{update_dir}/{output_file}", 'w', encoding='utf-8', newline='') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=headers)
+        writer.writeheader()
+        for row in data:
+            writer.writerow(row)