import requests from bs4 import BeautifulSoup from ebooklib import epub import re import os import json import time import csv import logging from datetime import datetime import config # 从"创建时间 2025-03-08 13:57:00" 中提取时间 def extract_create_time(input_str): pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}' match = re.search(pattern, input_str) if match: datetime_str = match.group(0) return datetime_str else: return input_str # 从 "read-374864.html" 中获取数字编号 def extract_page_num(page_str, default_num = 0): # 定义正则表达式模式 pattern = r'read-(\d+)\.html' # 使用 re.search 查找匹配项 match = re.search(pattern, page_str) if match: number = match.group(1) return number else: return default_num # 从 "book-5549.html" 中获取数字编号 def extract_book_num(page_str, default_num = 0): # 定义正则表达式模式 pattern = r'book-(\d+)\.html' # 使用 re.search 查找匹配项 match = re.search(pattern, page_str) if match: number = match.group(1) return number else: return default_num # 处理 [都市] 的方括号 def remove_brackets_regex(input_str): pattern = r'\[(.*?)\]' match = re.match(pattern, input_str) if match: return match.group(1) return input_str # 定义函数来抓取小说章节内容 def fetch_chapter(url): try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # 这里需要根据实际网页结构修改选择器 chapter_content = soup.find('div', class_='chapter-content').get_text() return chapter_content except requests.RequestException as e: print(f"请求出错: {e}") return None # 定义函数来生成 EPUB 文件 def generate_epub(title, author, chapters, path): book = epub.EpubBook() book.set_title(title) book.set_language('zh') book.add_author(author) epub_chapters = [] for chapter_title, chapter_content in chapters: c = epub.EpubHtml(title=chapter_title, file_name=f'{chapter_title}.xhtml', lang='zh') c.content = f'
{chapter_content}
' book.add_item(c) epub_chapters.append(c) # 定义书的结构 book.toc = tuple(epub_chapters) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) # 定义样式 style = 'body { font-family: Times, serif; }' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) # 定义书的结构 book.spine = ['nav'] + epub_chapters # 保存 EPUB 文件 epub.write_epub(f'{path}/{title}.epub', book, {}) # 示例使用 if __name__ == "__main__": # 这里需要替换为实际的小说章节链接 chapter_info = [ ('第一章', 'https://example.com/chapter1'), ('第二章', 'https://example.com/chapter2') ] title = '小说标题' author = '小说作者' chapters = [] for chapter_title, url in chapter_info: content = fetch_chapter(url) if content: chapters.append((chapter_title, content)) if chapters: generate_epub(title, author, chapters) print(f'{title}.epub 文件生成成功。') else: print('未获取到有效章节内容,无法生成 EPUB 文件。')