diff --git a/aabook/src/dump_book.py b/aabook/src/dump_book.py
index 6c88dd5..7725963 100644
--- a/aabook/src/dump_book.py
+++ b/aabook/src/dump_book.py
@@ -9,7 +9,7 @@ import sqlite_utils as db_tools
import scraper
import utils
import config
-import convert_utils
+from epub_tools import generate_epub
config.setup_logging()
@@ -41,16 +41,42 @@ map_books_2 = {
5240 : "幸福的借种经历",
5171 : "我的师娘和师妹",
}
+map_books_3 = {
+ 67 : "山村野香",
+}
-map_books = map_books_2
+map_books = map_books_3
# 使用示例
if __name__ == "__main__":
- #books = [2689, 3727, 4698, 5446]
- #books = [3167, 2985, 3098]
- #books = [2783]
- for book, name in map_books.items():
+ # 命令行参数处理
+ parser = argparse.ArgumentParser(description='Dump book data to epub files')
+ parser.add_argument('--list', type=str, default='', help='booids to dump, e.g. 2689,3727,4698')
+ parser.add_argument('--url', type=str, default='', help='URL to fetch book data')
+ args = parser.parse_args()
+
+ # 从URL获取书籍数据
+ if args.url:
+ data = db_tools.get_contents_by_href(args.url)
+ if data:
+ title = data['title']
+ href = data.get('href', '')
+ book_file = f"{books_dir}/{title}.epub"
+ generate_epub(data, book_file)
+ print(f"dump {title} - {href} suss! file: {book_file}")
+ else:
+ print(f"Failed to fetch data from {args.url}")
+ exit(0)
+
+ if args.list:
+ book_ids = [int(bid) for bid in args.list.split(',')]
+ else:
+ book_ids = map_books.keys()
+
+ for book in book_ids:
data = db_tools.get_contents_by_book(book)
if data:
title = data['title']
- convert_utils.generate_epub(data, f"{books_dir}/{title}.epub")
- print(f"dump {title} suss!")
\ No newline at end of file
+ href = data.get('href', '')
+ book_file = f"{books_dir}/{title}.epub"
+ generate_epub(data, book_file)
+ print(f"dump {book} - {title} - {href} suss! file: {book_file}")
\ No newline at end of file
diff --git a/aabook/src/epub_tools.py b/aabook/src/epub_tools.py
new file mode 100644
index 0000000..bdeaef9
--- /dev/null
+++ b/aabook/src/epub_tools.py
@@ -0,0 +1,68 @@
+from ebooklib import epub
+import os
+
+def generate_epub(data, save_path):
+ # 创建 EPUB 书籍对象
+ book = epub.EpubBook()
+
+ # 设置书籍元数据
+ book.set_title(data.get('title', '未知标题'))
+ book.set_language('zh')
+ book.add_author(data.get('author', '未知作者'))
+
+ # 存储所有章节对象
+ all_chapters = []
+
+ sections = data.get('sections', [])
+
+ if len(sections) == 1:
+ # 如果只有一个 section,忽略 section 的 title,按一级目录处理
+ for index, chapter in enumerate(sections[0].get('chapters', []), start=1):
+ chapter_title = chapter.get('title', '未知章节')
+ chapter_content = chapter.get('content', '')
+ paragraphs = chapter_content.split('\n\n')
+ html_content = ''.join([f'
{para}
' for para in paragraphs])
+ # 为文件名添加序号,避免冲突
+ chapter_obj = epub.EpubHtml(title=chapter_title, file_name=f'{index}_{chapter_title}.xhtml', lang='zh')
+ chapter_obj.content = f'{chapter_title}
{html_content}'
+ book.add_item(chapter_obj)
+ all_chapters.append(chapter_obj)
+ else:
+ # 如果有多个 section,按两级目录处理
+ for section_index, section in enumerate(sections, start=1):
+ section_title = section.get('title', '未知卷')
+ # 为 section 的文件名添加序号,避免冲突
+ section_chapter = epub.EpubHtml(title=section_title, file_name=f'{section_index}_{section_title}.xhtml', lang='zh')
+ section_chapter.content = f'{section_title}
'
+ book.add_item(section_chapter)
+ all_chapters.append(section_chapter)
+
+ for chapter_index, chapter in enumerate(section.get('chapters', []), start=1):
+ chapter_title = chapter.get('title', '未知章节')
+ chapter_content = chapter.get('content', '')
+ paragraphs = chapter_content.split('\n\n')
+ html_content = ''.join([f'{para}
' for para in paragraphs])
+ # 为 chapter 的文件名添加 section 序号和 chapter 序号,避免冲突
+ chapter_obj = epub.EpubHtml(title=chapter_title, file_name=f'{section_index}_{chapter_index}_{chapter_title}.xhtml', lang='zh')
+ chapter_obj.content = f'{chapter_title}
{html_content}'
+ book.add_item(chapter_obj)
+ all_chapters.append(chapter_obj)
+
+ # 定义书籍的目录
+ book.toc = tuple(all_chapters)
+
+ # 定义书的结构
+ book.add_item(epub.EpubNcx())
+ book.add_item(epub.EpubNav())
+
+ # 定义样式
+ style = 'body { font-family: Times, serif; }'
+ nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
+ book.add_item(nav_css)
+
+ # 定义书的结构
+ book.spine = ['nav'] + all_chapters
+
+ # 保存 EPUB 文件
+ epub.write_epub(save_path, book, {})
+
diff --git a/aabook/src/sqlite_utils.py b/aabook/src/sqlite_utils.py
index f82b96d..5f9f480 100644
--- a/aabook/src/sqlite_utils.py
+++ b/aabook/src/sqlite_utils.py
@@ -397,7 +397,7 @@ def get_contents_by_book(id):
return None
# 查询是书否存在
- cursor.execute(f"SELECT id, name, author, category, status FROM {tbl_name_books} WHERE id= {id}")
+ cursor.execute(f"SELECT id, name, author, category, status, href FROM {tbl_name_books} WHERE id= {id}")
existing_book = cursor.fetchone()
if not existing_book: # 书不存在
logging.warning(f"book {id} have no meta data.")
@@ -408,6 +408,7 @@ def get_contents_by_book(id):
book_data['author'] = existing_book[2]
book_data['category'] = existing_book[3]
book_data['status'] = existing_book[4]
+ book_data['href'] = existing_book[5]
book_data['sections'] = []
# 组装section信息
@@ -438,6 +439,23 @@ def get_contents_by_book(id):
logging.error(f"查询 href 失败: {e}")
return None
+# 获取完整的小说内容
+def get_contents_by_href(href):
+ try:
+ # 查询内容表是否存在
+ # 查询是书否存在
+ cursor.execute(f"SELECT id FROM {tbl_name_books} WHERE href= '{href}'")
+ existing_book = cursor.fetchone()
+ if not existing_book: # 书不存在
+ logging.warning(f"book {id} have no meta data.")
+ return None
+
+ return get_contents_by_book(existing_book[0])
+
+ except sqlite3.Error as e:
+ logging.error(f"查询 href 失败: {e}")
+ return None
+
# 统计信息
def get_statics():
diff --git a/aabook/src/text_to_epub.py b/aabook/src/text_to_epub.py
deleted file mode 100644
index 65709f6..0000000
--- a/aabook/src/text_to_epub.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import re
-from ebooklib import epub
-import os
-import config
-
-config.setup_logging()
-
-books_dir = f"{config.global_host_data_dir}/aabook/data"
-
-
-def txt_to_epub(txt_path, epub_path):
- # 读取 TXT 文件,使用 GB18030 编码
- with open(txt_path, 'r', encoding='gb18030') as file:
- content = file.read()
-
- # 按章节分割内容
- chapter_pattern = re.compile(r'第\d+章')
- chapter_matches = list(chapter_pattern.finditer(content))
- chapters = []
- for i in range(len(chapter_matches)):
- start = chapter_matches[i].start()
- if i < len(chapter_matches) - 1:
- end = chapter_matches[i + 1].start()
- else:
- end = len(content)
- chapter_content = content[start:end]
- chapter_title = chapter_pattern.search(chapter_content).group()
- chapters.append((chapter_title, chapter_content))
-
- # 创建 EPUB 书籍
- book = epub.EpubBook()
- book.set_title(os.path.basename(txt_path).replace('.txt', ''))
- book.set_language('zh')
-
- # 添加章节到 EPUB
- epub_chapters = []
- for title, content in chapters:
- chapter = epub.EpubHtml(title=title, file_name=f'{title}.xhtml', lang='zh')
- # 处理换行符,将换行符替换为 HTML 的
标签
- content = content.replace(title, "", 1).strip()
- content = content.replace('\r\n', '
')
- content = content.replace('\n', '
')
- chapter.content = f'{title}
{content}
'
- book.add_item(chapter)
- epub_chapters.append(chapter)
-
- # 定义书籍结构
- book.toc = tuple(epub_chapters)
- book.add_item(epub.EpubNcx())
- book.add_item(epub.EpubNav())
-
- # 定义样式
- style = 'body { font-family: Times, serif; }'
- nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
- book.add_item(nav_css)
-
- # 设置书籍的页面布局
- book.spine = ['nav'] + epub_chapters
-
- # 保存 EPUB 文件
- epub.write_epub(epub_path, book, {})
-
-
-# 使用示例
-txt_file1 = f"{books_dir}/我的青年岁月(加章节).txt"
-txt_file2 = f"{books_dir}/废都(海外版)贾平凹-加章节.txt"
-epub_file1 = f"{books_dir}/我的青年岁月(加章节).epub"
-epub_file2 = f"{books_dir}/废都(海外版)贾平凹-加章节.epub"
-
-txt_to_epub(txt_file1, epub_file1)
-txt_to_epub(txt_file2, epub_file2)