modify scripts

2025-03-18 17:45:20 +08:00
parent d5dc76b87f
commit a4ea79d4db
14 changed files with 1369 additions and 13 deletions
--- a/aabook/tools/tools_diff.py
+++ b/aabook/tools/tools_diff.py
@ -0,0 +1,64 @@
+import sys
+import csv
+
+def compare_files(file_a, file_b):
+    """
+    比较两个文件，并输出差异
+
+    Args:
+        file_a (str): 文件 A 的路径
+        file_b (str): 文件 B 的路径
+    """
+
+    try:
+        # 创建输出文件
+        with open('need_update.txt', 'w', newline='') as f_update, \
+             open('old_only.txt', 'w', newline='') as f_b_only:
+            writer_update = csv.writer(f_update, delimiter='\t')
+            writer_b_only = csv.writer(f_b_only, delimiter='\t')
+
+            # 读取文件 A
+            data_a = {}  # 使用字典存储，key为开始链接编码，value为整行数据
+            with open(file_a, 'r') as f:
+                reader = csv.reader(f, delimiter='\t')
+                for row in reader:
+                    data_a[row[6]] = row
+
+            # 读取文件 B
+            data_b = {}
+            with open(file_b, 'r') as f:
+                reader = csv.reader(f, delimiter='\t')
+                for row in reader:
+                    data_b[row[6]] = row
+
+            # 比较并输出
+            for key, value in data_a.items():
+                if key not in data_b:
+                    writer_update.writerow(value)
+                else:
+                    if abs(int(value[8]) - int(data_b[key][8])) > 100:
+                        writer_update.writerow(value)
+
+            for key, value in data_b.items():
+                if key not in data_a:
+                    writer_b_only.writerow(value)
+
+    except FileNotFoundError:
+        print(f"文件不存在: {file_a} 或 {file_b}")
+    except csv.Error as e:
+        print(f"CSV文件读取错误: {e}")
+    except Exception as e:
+        print(f"发生未知错误: {e}")
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print("Usage: python script.py file_a file_b")
+        print("file_a : 新下载的列表，通常按照更新时间排序")
+        print("file_b : 以前下载的列表")
+        print("输出 need_update.txt : 需要更新的列表")
+        print("输出 old_only.txt : 仅在较早下载列表中的行，通常不应该有")
+        sys.exit(1)
+
+    file_a = sys.argv[1]
+    file_b = sys.argv[2]
+    compare_files(file_a, file_b)
--- a/aabook/tools/tools_dir.py
+++ b/aabook/tools/tools_dir.py
@ -0,0 +1,66 @@
+import os
+import shutil
+import argparse
+import re
+
+def flatten_directory(source_dir):
+    """
+    将指定目录下的所有txt文件移动到当前目录并重命名
+
+    Args:
+        source_dir: 源目录
+    """
+
+    for root, dirs, files in os.walk(source_dir):
+        for file in files:
+            if file.endswith('.txt'):
+                src_file = os.path.join(root, file)
+                dst_file = os.path.join(source_dir, f"[{os.path.basename(root)}]_{file}")
+                print(f'move {src_file} {dst_file}')
+                shutil.move(src_file, dst_file)
+
+def unflatten_directory(source_dir):
+    """
+    将当前目录下的txt文件按照文件名中的目录信息进行分类
+
+    Args:
+        source_dir: 源目录
+    """
+
+    for file in os.listdir(source_dir):
+        if file.endswith('.txt'):
+            #dir_name, filename = file.split(']', 1)[0][1:], file.split(']', 1)[1][1:]
+            # 方法二：使用正则表达式
+            match = re.match(r"\[(.*)]_(.*)", file)
+            if match:
+                dir_name, filename = match.groups()
+
+            dst_dir = os.path.join(source_dir, dir_name)
+            dst_file = os.path.join(dst_dir, filename)
+            src_file = os.path.join(source_dir, file)
+
+            # 创建目标目录
+            os.makedirs(dst_dir, exist_ok=True)
+
+            # 移动文件
+            print(f'move {src_file} {dst_file}')
+            shutil.move(src_file, dst_file)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Flatten or unflatten a directory of txt files')
+    parser.add_argument('-f', '--flatten', action='store_true', help='Flatten the directory')
+    parser.add_argument('-u', '--unflatten', action='store_true', help='Unflatten the directory')
+    parser.add_argument('directory', help='The directory to process')
+    args = parser.parse_args()
+
+    if args.flatten and args.unflatten:
+        print("Please choose either --flatten or --unflatten, not both.")
+        exit(1)
+
+    if args.flatten:
+        flatten_directory(args.directory)
+    elif args.unflatten:
+        unflatten_directory(args.directory)
+    else:
+        print("Please specify either --flatten or --unflatten.")
+        exit(1)
--- a/aabook/tools/tools_other.py
+++ b/aabook/tools/tools_other.py
@ -0,0 +1,61 @@
+import os
+
+def rename_files(list_file, data_dir):
+    """
+    重命名文件
+
+    Args:
+        list_file: 存放 novel_id 和 novel_name 的文件路径
+        data_dir: 需要重命名文件的目录
+    """
+
+    # 读取列表文件，构建一个字典，key为novel_name，value为novel_id
+    id_dict = {}
+    with open(list_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            novel_id, novel_name = line.strip().split('\t')
+            id_dict[novel_name] = novel_id
+
+    # 遍历 data 目录下的所有文件
+    for root, dirs, files in os.walk(data_dir):
+        for file in files:
+            if file.endswith('.txt'):
+                # 获取文件名（不含扩展名）
+                novel_name = file[:-4]
+                # 判断文件名是否在字典中
+                if novel_name in id_dict:
+                    old_file = os.path.join(root, file)
+                    new_file = os.path.join(root, f"{id_dict[novel_name]}_{novel_name}.txt")
+                    os.rename(old_file, new_file)
+                    print(f"Renamed {old_file} to {new_file}")
+
+
+def check_and_record(data_dir, search_string, output_file):
+    """
+    检查文件内容并记录
+
+    Args:
+        data_dir: 需要检查的目录
+        search_string: 需要搜索的字符串
+        output_file: 记录结果的文件
+    """
+
+    with open(output_file, 'w', encoding='utf-8') as output:
+        for root, dirs, files in os.walk(data_dir):
+            for file in files:
+                if file.endswith('.txt'):
+                    novel_name = file[:-4]
+                    file_path = os.path.join(root, file)
+                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                        if search_string in f.read():
+                            output.write(novel_name + '\n')
+                            print(f"need update: {novel_name}")
+
+
+if __name__ == '__main__':
+    # rename_files("aabook_down_list.txt", "data")
+
+    data_dir = "data"
+    search_string = "2005-2024 疯情书库"
+    output_file = "aabook_need_update.txt"
+    check_and_record(data_dir, search_string, output_file)