modify scripts
This commit is contained in:
122
aabook/bak/utils.py
Normal file
122
aabook/bak/utils.py
Normal file
@ -0,0 +1,122 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from ebooklib import epub
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import csv
|
||||
import logging
|
||||
from datetime import datetime
|
||||
import config
|
||||
|
||||
|
||||
# 从"创建时间 2025-03-08 13:57:00" 中提取时间
|
||||
def extract_create_time(input_str):
|
||||
pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
|
||||
match = re.search(pattern, input_str)
|
||||
if match:
|
||||
datetime_str = match.group(0)
|
||||
return datetime_str
|
||||
else:
|
||||
return input_str
|
||||
|
||||
# 从 "read-374864.html" 中获取数字编号
|
||||
def extract_page_num(page_str, default_num = 0):
|
||||
# 定义正则表达式模式
|
||||
pattern = r'read-(\d+)\.html'
|
||||
# 使用 re.search 查找匹配项
|
||||
match = re.search(pattern, page_str)
|
||||
if match:
|
||||
number = match.group(1)
|
||||
return number
|
||||
else:
|
||||
return default_num
|
||||
|
||||
# 从 "book-5549.html" 中获取数字编号
|
||||
def extract_book_num(page_str, default_num = 0):
|
||||
# 定义正则表达式模式
|
||||
pattern = r'book-(\d+)\.html'
|
||||
# 使用 re.search 查找匹配项
|
||||
match = re.search(pattern, page_str)
|
||||
if match:
|
||||
number = match.group(1)
|
||||
return number
|
||||
else:
|
||||
return default_num
|
||||
|
||||
# 处理 [都市] 的方括号
|
||||
def remove_brackets_regex(input_str):
|
||||
pattern = r'\[(.*?)\]'
|
||||
match = re.match(pattern, input_str)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return input_str
|
||||
|
||||
# 定义函数来抓取小说章节内容
|
||||
def fetch_chapter(url):
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
# 这里需要根据实际网页结构修改选择器
|
||||
chapter_content = soup.find('div', class_='chapter-content').get_text()
|
||||
return chapter_content
|
||||
except requests.RequestException as e:
|
||||
print(f"请求出错: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# 定义函数来生成 EPUB 文件
|
||||
def generate_epub(title, author, chapters, path):
|
||||
book = epub.EpubBook()
|
||||
book.set_title(title)
|
||||
book.set_language('zh')
|
||||
book.add_author(author)
|
||||
|
||||
epub_chapters = []
|
||||
for chapter_title, chapter_content in chapters:
|
||||
c = epub.EpubHtml(title=chapter_title, file_name=f'{chapter_title}.xhtml', lang='zh')
|
||||
c.content = f'<h1>{chapter_title}</h1><p>{chapter_content}</p>'
|
||||
book.add_item(c)
|
||||
epub_chapters.append(c)
|
||||
|
||||
# 定义书的结构
|
||||
book.toc = tuple(epub_chapters)
|
||||
book.add_item(epub.EpubNcx())
|
||||
book.add_item(epub.EpubNav())
|
||||
|
||||
# 定义样式
|
||||
style = 'body { font-family: Times, serif; }'
|
||||
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
|
||||
book.add_item(nav_css)
|
||||
|
||||
# 定义书的结构
|
||||
book.spine = ['nav'] + epub_chapters
|
||||
|
||||
# 保存 EPUB 文件
|
||||
epub.write_epub(f'{path}/{title}.epub', book, {})
|
||||
|
||||
|
||||
# 示例使用
|
||||
if __name__ == "__main__":
|
||||
# 这里需要替换为实际的小说章节链接
|
||||
chapter_info = [
|
||||
('第一章', 'https://example.com/chapter1'),
|
||||
('第二章', 'https://example.com/chapter2')
|
||||
]
|
||||
title = '小说标题'
|
||||
author = '小说作者'
|
||||
|
||||
chapters = []
|
||||
for chapter_title, url in chapter_info:
|
||||
content = fetch_chapter(url)
|
||||
if content:
|
||||
chapters.append((chapter_title, content))
|
||||
|
||||
if chapters:
|
||||
generate_epub(title, author, chapters)
|
||||
print(f'{title}.epub 文件生成成功。')
|
||||
else:
|
||||
print('未获取到有效章节内容,无法生成 EPUB 文件。')
|
||||
|
||||
Reference in New Issue
Block a user