This repository has been archived on 2026-01-07. You can view files and clone it, but cannot push or open issues or pull requests.
Files
resources/aabook/bak/utils.py
2025-03-19 08:34:30 +08:00

122 lines
3.4 KiB
Python

import requests
from bs4 import BeautifulSoup
from ebooklib import epub
import re
import os
import json
import time
import csv
import logging
from datetime import datetime
import config
# 从"创建时间 2025-03-08 13:57:00" 中提取时间
def extract_create_time(input_str):
pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
match = re.search(pattern, input_str)
if match:
datetime_str = match.group(0)
return datetime_str
else:
return input_str
# 从 "read-374864.html" 中获取数字编号
def extract_page_num(page_str, default_num = 0):
# 定义正则表达式模式
pattern = r'read-(\d+)\.html'
# 使用 re.search 查找匹配项
match = re.search(pattern, page_str)
if match:
number = match.group(1)
return number
else:
return default_num
# 从 "book-5549.html" 中获取数字编号
def extract_book_num(page_str, default_num = 0):
# 定义正则表达式模式
pattern = r'book-(\d+)\.html'
# 使用 re.search 查找匹配项
match = re.search(pattern, page_str)
if match:
number = match.group(1)
return number
else:
return default_num
# 处理 [都市] 的方括号
def remove_brackets_regex(input_str):
pattern = r'\[(.*?)\]'
match = re.match(pattern, input_str)
if match:
return match.group(1)
return input_str
# 定义函数来抓取小说章节内容
def fetch_chapter(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# 这里需要根据实际网页结构修改选择器
chapter_content = soup.find('div', class_='chapter-content').get_text()
return chapter_content
except requests.RequestException as e:
print(f"请求出错: {e}")
return None
# 定义函数来生成 EPUB 文件
def generate_epub(title, author, chapters, path):
book = epub.EpubBook()
book.set_title(title)
book.set_language('zh')
book.add_author(author)
epub_chapters = []
for chapter_title, chapter_content in chapters:
c = epub.EpubHtml(title=chapter_title, file_name=f'{chapter_title}.xhtml', lang='zh')
c.content = f'<h1>{chapter_title}</h1><p>{chapter_content}</p>'
book.add_item(c)
epub_chapters.append(c)
# 定义书的结构
book.toc = tuple(epub_chapters)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# 定义样式
style = 'body { font-family: Times, serif; }'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
book.add_item(nav_css)
# 定义书的结构
book.spine = ['nav'] + epub_chapters
# 保存 EPUB 文件
epub.write_epub(f'{path}/{title}.epub', book, {})
# 示例使用
if __name__ == "__main__":
# 这里需要替换为实际的小说章节链接
chapter_info = [
('第一章', 'https://example.com/chapter1'),
('第二章', 'https://example.com/chapter2')
]
title = '小说标题'
author = '小说作者'
chapters = []
for chapter_title, url in chapter_info:
content = fetch_chapter(url)
if content:
chapters.append((chapter_title, content))
if chapters:
generate_epub(title, author, chapters)
print(f'{title}.epub 文件生成成功。')
else:
print('未获取到有效章节内容,无法生成 EPUB 文件。')