modify scripts

This commit is contained in:
oscarz
2025-03-19 08:34:30 +08:00
parent 8791348490
commit 2b1266bbd2
12 changed files with 22 additions and 10754 deletions

122
aabook/bak/utils.py Normal file
View File

@ -0,0 +1,122 @@
import requests
from bs4 import BeautifulSoup
from ebooklib import epub
import re
import os
import json
import time
import csv
import logging
from datetime import datetime
import config
# 从"创建时间 2025-03-08 13:57:00" 中提取时间
def extract_create_time(input_str):
pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
match = re.search(pattern, input_str)
if match:
datetime_str = match.group(0)
return datetime_str
else:
return input_str
# 从 "read-374864.html" 中获取数字编号
def extract_page_num(page_str, default_num = 0):
# 定义正则表达式模式
pattern = r'read-(\d+)\.html'
# 使用 re.search 查找匹配项
match = re.search(pattern, page_str)
if match:
number = match.group(1)
return number
else:
return default_num
# 从 "book-5549.html" 中获取数字编号
def extract_book_num(page_str, default_num = 0):
# 定义正则表达式模式
pattern = r'book-(\d+)\.html'
# 使用 re.search 查找匹配项
match = re.search(pattern, page_str)
if match:
number = match.group(1)
return number
else:
return default_num
# 处理 [都市] 的方括号
def remove_brackets_regex(input_str):
pattern = r'\[(.*?)\]'
match = re.match(pattern, input_str)
if match:
return match.group(1)
return input_str
# 定义函数来抓取小说章节内容
def fetch_chapter(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# 这里需要根据实际网页结构修改选择器
chapter_content = soup.find('div', class_='chapter-content').get_text()
return chapter_content
except requests.RequestException as e:
print(f"请求出错: {e}")
return None
# 定义函数来生成 EPUB 文件
def generate_epub(title, author, chapters, path):
book = epub.EpubBook()
book.set_title(title)
book.set_language('zh')
book.add_author(author)
epub_chapters = []
for chapter_title, chapter_content in chapters:
c = epub.EpubHtml(title=chapter_title, file_name=f'{chapter_title}.xhtml', lang='zh')
c.content = f'<h1>{chapter_title}</h1><p>{chapter_content}</p>'
book.add_item(c)
epub_chapters.append(c)
# 定义书的结构
book.toc = tuple(epub_chapters)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# 定义样式
style = 'body { font-family: Times, serif; }'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
book.add_item(nav_css)
# 定义书的结构
book.spine = ['nav'] + epub_chapters
# 保存 EPUB 文件
epub.write_epub(f'{path}/{title}.epub', book, {})
# 示例使用
if __name__ == "__main__":
# 这里需要替换为实际的小说章节链接
chapter_info = [
('第一章', 'https://example.com/chapter1'),
('第二章', 'https://example.com/chapter2')
]
title = '小说标题'
author = '小说作者'
chapters = []
for chapter_title, url in chapter_info:
content = fetch_chapter(url)
if content:
chapters.append((chapter_title, content))
if chapters:
generate_epub(title, author, chapters)
print(f'{title}.epub 文件生成成功。')
else:
print('未获取到有效章节内容,无法生成 EPUB 文件。')