resources/aabook/bak/utils.py

import requests
from bs4 import BeautifulSoup
from ebooklib import epub
import re
import os
import json
import time
import csv
import logging
from datetime import datetime
import config


# 从"创建时间  2025-03-08 13:57:00" 中提取时间
def extract_create_time(input_str):
    pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
    match = re.search(pattern, input_str)
    if match:
        datetime_str = match.group(0)
        return datetime_str
    else:
        return input_str

# 从 "read-374864.html" 中获取数字编号
def extract_page_num(page_str, default_num = 0):
    # 定义正则表达式模式
    pattern = r'read-(\d+)\.html'
    # 使用 re.search 查找匹配项
    match = re.search(pattern, page_str)
    if match:
        number = match.group(1)
        return number
    else:
        return default_num

# 从 "book-5549.html" 中获取数字编号
def extract_book_num(page_str, default_num = 0):
    # 定义正则表达式模式
    pattern = r'book-(\d+)\.html'
    # 使用 re.search 查找匹配项
    match = re.search(pattern, page_str)
    if match:
        number = match.group(1)
        return number
    else:
        return default_num

# 处理 [都市] 的方括号
def remove_brackets_regex(input_str):
    pattern = r'\[(.*?)\]'
    match = re.match(pattern, input_str)
    if match:
        return match.group(1)
    return input_str

# 定义函数来抓取小说章节内容
def fetch_chapter(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        # 这里需要根据实际网页结构修改选择器
        chapter_content = soup.find('div', class_='chapter-content').get_text()
        return chapter_content
    except requests.RequestException as e:
        print(f"请求出错: {e}")
        return None


# 定义函数来生成 EPUB 文件
def generate_epub(title, author, chapters, path):
    book = epub.EpubBook()
    book.set_title(title)
    book.set_language('zh')
    book.add_author(author)

    epub_chapters = []
    for chapter_title, chapter_content in chapters:
        c = epub.EpubHtml(title=chapter_title, file_name=f'{chapter_title}.xhtml', lang='zh')
        c.content = f'<h1>{chapter_title}</h1><p>{chapter_content}</p>'
        book.add_item(c)
        epub_chapters.append(c)

    # 定义书的结构
    book.toc = tuple(epub_chapters)
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # 定义样式
    style = 'body { font-family: Times, serif; }'
    nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
    book.add_item(nav_css)

    # 定义书的结构
    book.spine = ['nav'] + epub_chapters

    # 保存 EPUB 文件
    epub.write_epub(f'{path}/{title}.epub', book, {})


# 示例使用
if __name__ == "__main__":
    # 这里需要替换为实际的小说章节链接
    chapter_info = [
        ('第一章', 'https://example.com/chapter1'),
        ('第二章', 'https://example.com/chapter2')
    ]
    title = '小说标题'
    author = '小说作者'

    chapters = []
    for chapter_title, url in chapter_info:
        content = fetch_chapter(url)
        if content:
            chapters.append((chapter_title, content))

    if chapters:
        generate_epub(title, author, chapters)
        print(f'{title}.epub 文件生成成功。')
    else:
        print('未获取到有效章节内容，无法生成 EPUB 文件。')