modify scripts

2025-03-19 08:34:30 +08:00
parent 8791348490
commit 2b1266bbd2
12 changed files with 22 additions and 10754 deletions
--- a/aabook/bak/utils.py
+++ b/aabook/bak/utils.py
@ -0,0 +1,122 @@
+import requests
+from bs4 import BeautifulSoup
+from ebooklib import epub
+import re
+import os
+import json
+import time
+import csv
+import logging
+from datetime import datetime
+import config
+
+
+# 从"创建时间  2025-03-08 13:57:00" 中提取时间
+def extract_create_time(input_str):
+    pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
+    match = re.search(pattern, input_str)
+    if match:
+        datetime_str = match.group(0)
+        return datetime_str
+    else:
+        return input_str
+
+# 从 "read-374864.html" 中获取数字编号
+def extract_page_num(page_str, default_num = 0):
+    # 定义正则表达式模式
+    pattern = r'read-(\d+)\.html'
+    # 使用 re.search 查找匹配项
+    match = re.search(pattern, page_str)
+    if match:
+        number = match.group(1)
+        return number
+    else:
+        return default_num
+
+# 从 "book-5549.html" 中获取数字编号
+def extract_book_num(page_str, default_num = 0):
+    # 定义正则表达式模式
+    pattern = r'book-(\d+)\.html'
+    # 使用 re.search 查找匹配项
+    match = re.search(pattern, page_str)
+    if match:
+        number = match.group(1)
+        return number
+    else:
+        return default_num
+    
+# 处理 [都市] 的方括号
+def remove_brackets_regex(input_str):
+    pattern = r'\[(.*?)\]'
+    match = re.match(pattern, input_str)
+    if match:
+        return match.group(1)
+    return input_str
+
+# 定义函数来抓取小说章节内容
+def fetch_chapter(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # 这里需要根据实际网页结构修改选择器
+        chapter_content = soup.find('div', class_='chapter-content').get_text()
+        return chapter_content
+    except requests.RequestException as e:
+        print(f"请求出错: {e}")
+        return None
+
+
+# 定义函数来生成 EPUB 文件
+def generate_epub(title, author, chapters, path):
+    book = epub.EpubBook()
+    book.set_title(title)
+    book.set_language('zh')
+    book.add_author(author)
+
+    epub_chapters = []
+    for chapter_title, chapter_content in chapters:
+        c = epub.EpubHtml(title=chapter_title, file_name=f'{chapter_title}.xhtml', lang='zh')
+        c.content = f'<h1>{chapter_title}</h1><p>{chapter_content}</p>'
+        book.add_item(c)
+        epub_chapters.append(c)
+
+    # 定义书的结构
+    book.toc = tuple(epub_chapters)
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+
+    # 定义样式
+    style = 'body { font-family: Times, serif; }'
+    nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
+    book.add_item(nav_css)
+
+    # 定义书的结构
+    book.spine = ['nav'] + epub_chapters
+
+    # 保存 EPUB 文件
+    epub.write_epub(f'{path}/{title}.epub', book, {})
+
+
+# 示例使用
+if __name__ == "__main__":
+    # 这里需要替换为实际的小说章节链接
+    chapter_info = [
+        ('第一章', 'https://example.com/chapter1'),
+        ('第二章', 'https://example.com/chapter2')
+    ]
+    title = '小说标题'
+    author = '小说作者'
+
+    chapters = []
+    for chapter_title, url in chapter_info:
+        content = fetch_chapter(url)
+        if content:
+            chapters.append((chapter_title, content))
+
+    if chapters:
+        generate_epub(title, author, chapters)
+        print(f'{title}.epub 文件生成成功。')
+    else:
+        print('未获取到有效章节内容，无法生成 EPUB 文件。')
+