From 7717ad1f60b64a6091b3b6afb8255cc52eb917d4 Mon Sep 17 00:00:00 2001 From: oscarz Date: Sun, 30 Mar 2025 09:29:38 +0800 Subject: [PATCH] modify scripts --- aabook/src/dump_book.py | 22 +++++++++++- aabook/src/text_to_epub.py | 71 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 aabook/src/text_to_epub.py diff --git a/aabook/src/dump_book.py b/aabook/src/dump_book.py index e26f65f..97f87c6 100644 --- a/aabook/src/dump_book.py +++ b/aabook/src/dump_book.py @@ -15,9 +15,29 @@ config.setup_logging() books_dir = f"{config.global_host_data_dir}/aabook/data" +map_books = { + 380 : "多情村妇", + 2547 : "乡村乱情|奇思妙想", + 2261 : "宋家湾那些事儿", + 3152 : "渔港春夜", + 3167 : "天狗", + 3681 : "东北大炕", + 5524 : "淫乱竹楼", + 5389 : "杨家洼情事", + 2783 : "风流岁月", + 4827 : "蛮荒小村的风流韵事", + 1114 : "熟透了的村妇", + 4347 : "潭河峪的那些事儿", + 3744 : "香火", + 4984 : "山里人家", + +} + # 使用示例 if __name__ == "__main__": - books = [4, 2600] + #books = [2689, 3727, 4698, 5446] + #books = [3167, 2985, 3098] + books = [2783] for book in books: data = db_tools.get_contents_by_book(book) if data: diff --git a/aabook/src/text_to_epub.py b/aabook/src/text_to_epub.py new file mode 100644 index 0000000..65709f6 --- /dev/null +++ b/aabook/src/text_to_epub.py @@ -0,0 +1,71 @@ +import re +from ebooklib import epub +import os +import config + +config.setup_logging() + +books_dir = f"{config.global_host_data_dir}/aabook/data" + + +def txt_to_epub(txt_path, epub_path): + # 读取 TXT 文件,使用 GB18030 编码 + with open(txt_path, 'r', encoding='gb18030') as file: + content = file.read() + + # 按章节分割内容 + chapter_pattern = re.compile(r'第\d+章') + chapter_matches = list(chapter_pattern.finditer(content)) + chapters = [] + for i in range(len(chapter_matches)): + start = chapter_matches[i].start() + if i < len(chapter_matches) - 1: + end = chapter_matches[i + 1].start() + else: + end = len(content) + chapter_content = content[start:end] + chapter_title = chapter_pattern.search(chapter_content).group() + chapters.append((chapter_title, chapter_content)) + + # 创建 EPUB 书籍 + book = epub.EpubBook() + book.set_title(os.path.basename(txt_path).replace('.txt', '')) + book.set_language('zh') + + # 添加章节到 EPUB + epub_chapters = [] + for title, content in chapters: + chapter = epub.EpubHtml(title=title, file_name=f'{title}.xhtml', lang='zh') + # 处理换行符,将换行符替换为 HTML 的
标签 + content = content.replace(title, "", 1).strip() + content = content.replace('\r\n', '
') + content = content.replace('\n', '
') + chapter.content = f'

{title}

{content}

' + book.add_item(chapter) + epub_chapters.append(chapter) + + # 定义书籍结构 + book.toc = tuple(epub_chapters) + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + # 定义样式 + style = 'body { font-family: Times, serif; }' + nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) + book.add_item(nav_css) + + # 设置书籍的页面布局 + book.spine = ['nav'] + epub_chapters + + # 保存 EPUB 文件 + epub.write_epub(epub_path, book, {}) + + +# 使用示例 +txt_file1 = f"{books_dir}/我的青年岁月(加章节).txt" +txt_file2 = f"{books_dir}/废都(海外版)贾平凹-加章节.txt" +epub_file1 = f"{books_dir}/我的青年岁月(加章节).epub" +epub_file2 = f"{books_dir}/废都(海外版)贾平凹-加章节.epub" + +txt_to_epub(txt_file1, epub_file1) +txt_to_epub(txt_file2, epub_file2)