modify scripts

This commit is contained in:
2025-07-21 18:25:17 +08:00
parent 1dda30cd75
commit cb2db256d4

View File

@ -1,3 +1,6 @@
'''
词库来自: https://diginoodles.com/projects/eowl
'''
import os import os
import json import json
import random import random
@ -208,8 +211,20 @@ def generate_wordlist():
从 RESULT_DIR 下的 matched 文件中提取无重复字母的单词,并按 f 分类写入 words_{f}.txt 从 RESULT_DIR 下的 matched 文件中提取无重复字母的单词,并按 f 分类写入 words_{f}.txt
""" """
word_map = defaultdict(list) word_map = defaultdict(list)
all_words = set()
for file in sorted(os.scandir(RESULT_DIR), key=lambda f: int(f.name[:5])): # 优化写法:先筛选再排序
matched_files = []
for file in os.scandir(RESULT_DIR):
# 同上的过滤条件
if (file.is_file()
and file.name.endswith('.json')
and 'matched' in file.name
and len(file.name) >= 5
and file.name[:5].isdigit()):
matched_files.append(file)
for file in sorted(matched_files, key=lambda f: int(f.name[:5])):
if 'matched' not in file.name: if 'matched' not in file.name:
continue continue
@ -222,6 +237,7 @@ def generate_wordlist():
freq = item.get('f') freq = item.get('f')
if word and freq and has_no_repeated_letters(word): if word and freq and has_no_repeated_letters(word):
word_map[freq].append(word) word_map[freq].append(word)
all_words.add(word)
# 写入文件 # 写入文件
for freq, words in word_map.items(): for freq, words in word_map.items():
@ -231,6 +247,12 @@ def generate_wordlist():
f.write(word + '\n') f.write(word + '\n')
logging.info(f'✅ 写入完成: {filename} ({len(words)} 个单词)') logging.info(f'✅ 写入完成: {filename} ({len(words)} 个单词)')
# 写全量
filename = os.path.join(RESULT_DIR, 'wordlist.txt')
with open(filename, 'w', encoding='utf-8') as f:
for word in all_words:
f.write(word + '\n')
logging.info(f'✅ 写入完成: {filename} ({len(all_words)} 个单词)')
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()