modify scripts

This commit is contained in:
oscarz
2025-03-18 17:45:20 +08:00
parent d5dc76b87f
commit a4ea79d4db
14 changed files with 1369 additions and 13 deletions

View File

@ -0,0 +1,64 @@
import sys
import csv
def compare_files(file_a, file_b):
"""
比较两个文件,并输出差异
Args:
file_a (str): 文件 A 的路径
file_b (str): 文件 B 的路径
"""
try:
# 创建输出文件
with open('need_update.txt', 'w', newline='') as f_update, \
open('old_only.txt', 'w', newline='') as f_b_only:
writer_update = csv.writer(f_update, delimiter='\t')
writer_b_only = csv.writer(f_b_only, delimiter='\t')
# 读取文件 A
data_a = {} # 使用字典存储key为开始链接编码value为整行数据
with open(file_a, 'r') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
data_a[row[6]] = row
# 读取文件 B
data_b = {}
with open(file_b, 'r') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
data_b[row[6]] = row
# 比较并输出
for key, value in data_a.items():
if key not in data_b:
writer_update.writerow(value)
else:
if abs(int(value[8]) - int(data_b[key][8])) > 100:
writer_update.writerow(value)
for key, value in data_b.items():
if key not in data_a:
writer_b_only.writerow(value)
except FileNotFoundError:
print(f"文件不存在: {file_a}{file_b}")
except csv.Error as e:
print(f"CSV文件读取错误: {e}")
except Exception as e:
print(f"发生未知错误: {e}")
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: python script.py file_a file_b")
print("file_a : 新下载的列表,通常按照更新时间排序")
print("file_b : 以前下载的列表")
print("输出 need_update.txt : 需要更新的列表")
print("输出 old_only.txt : 仅在较早下载列表中的行,通常不应该有")
sys.exit(1)
file_a = sys.argv[1]
file_b = sys.argv[2]
compare_files(file_a, file_b)

66
aabook/tools/tools_dir.py Normal file
View File

@ -0,0 +1,66 @@
import os
import shutil
import argparse
import re
def flatten_directory(source_dir):
"""
将指定目录下的所有txt文件移动到当前目录并重命名
Args:
source_dir: 源目录
"""
for root, dirs, files in os.walk(source_dir):
for file in files:
if file.endswith('.txt'):
src_file = os.path.join(root, file)
dst_file = os.path.join(source_dir, f"[{os.path.basename(root)}]_{file}")
print(f'move {src_file} {dst_file}')
shutil.move(src_file, dst_file)
def unflatten_directory(source_dir):
"""
将当前目录下的txt文件按照文件名中的目录信息进行分类
Args:
source_dir: 源目录
"""
for file in os.listdir(source_dir):
if file.endswith('.txt'):
#dir_name, filename = file.split(']', 1)[0][1:], file.split(']', 1)[1][1:]
# 方法二:使用正则表达式
match = re.match(r"\[(.*)]_(.*)", file)
if match:
dir_name, filename = match.groups()
dst_dir = os.path.join(source_dir, dir_name)
dst_file = os.path.join(dst_dir, filename)
src_file = os.path.join(source_dir, file)
# 创建目标目录
os.makedirs(dst_dir, exist_ok=True)
# 移动文件
print(f'move {src_file} {dst_file}')
shutil.move(src_file, dst_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Flatten or unflatten a directory of txt files')
parser.add_argument('-f', '--flatten', action='store_true', help='Flatten the directory')
parser.add_argument('-u', '--unflatten', action='store_true', help='Unflatten the directory')
parser.add_argument('directory', help='The directory to process')
args = parser.parse_args()
if args.flatten and args.unflatten:
print("Please choose either --flatten or --unflatten, not both.")
exit(1)
if args.flatten:
flatten_directory(args.directory)
elif args.unflatten:
unflatten_directory(args.directory)
else:
print("Please specify either --flatten or --unflatten.")
exit(1)

View File

@ -0,0 +1,61 @@
import os
def rename_files(list_file, data_dir):
"""
重命名文件
Args:
list_file: 存放 novel_id 和 novel_name 的文件路径
data_dir: 需要重命名文件的目录
"""
# 读取列表文件构建一个字典key为novel_namevalue为novel_id
id_dict = {}
with open(list_file, 'r', encoding='utf-8') as f:
for line in f:
novel_id, novel_name = line.strip().split('\t')
id_dict[novel_name] = novel_id
# 遍历 data 目录下的所有文件
for root, dirs, files in os.walk(data_dir):
for file in files:
if file.endswith('.txt'):
# 获取文件名(不含扩展名)
novel_name = file[:-4]
# 判断文件名是否在字典中
if novel_name in id_dict:
old_file = os.path.join(root, file)
new_file = os.path.join(root, f"{id_dict[novel_name]}_{novel_name}.txt")
os.rename(old_file, new_file)
print(f"Renamed {old_file} to {new_file}")
def check_and_record(data_dir, search_string, output_file):
"""
检查文件内容并记录
Args:
data_dir: 需要检查的目录
search_string: 需要搜索的字符串
output_file: 记录结果的文件
"""
with open(output_file, 'w', encoding='utf-8') as output:
for root, dirs, files in os.walk(data_dir):
for file in files:
if file.endswith('.txt'):
novel_name = file[:-4]
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
if search_string in f.read():
output.write(novel_name + '\n')
print(f"need update: {novel_name}")
if __name__ == '__main__':
# rename_files("aabook_down_list.txt", "data")
data_dir = "data"
search_string = "2005-2024 疯情书库"
output_file = "aabook_need_update.txt"
check_and_record(data_dir, search_string, output_file)