modify scripts
This commit is contained in:
@ -1,3 +1,6 @@
|
||||
'''
|
||||
词库来自: https://diginoodles.com/projects/eowl
|
||||
'''
|
||||
import os
|
||||
import json
|
||||
import random
|
||||
@ -208,8 +211,20 @@ def generate_wordlist():
|
||||
从 RESULT_DIR 下的 matched 文件中提取无重复字母的单词,并按 f 分类写入 words_{f}.txt
|
||||
"""
|
||||
word_map = defaultdict(list)
|
||||
all_words = set()
|
||||
|
||||
for file in sorted(os.scandir(RESULT_DIR), key=lambda f: int(f.name[:5])):
|
||||
# 优化写法:先筛选再排序
|
||||
matched_files = []
|
||||
for file in os.scandir(RESULT_DIR):
|
||||
# 同上的过滤条件
|
||||
if (file.is_file()
|
||||
and file.name.endswith('.json')
|
||||
and 'matched' in file.name
|
||||
and len(file.name) >= 5
|
||||
and file.name[:5].isdigit()):
|
||||
matched_files.append(file)
|
||||
|
||||
for file in sorted(matched_files, key=lambda f: int(f.name[:5])):
|
||||
if 'matched' not in file.name:
|
||||
continue
|
||||
|
||||
@ -222,6 +237,7 @@ def generate_wordlist():
|
||||
freq = item.get('f')
|
||||
if word and freq and has_no_repeated_letters(word):
|
||||
word_map[freq].append(word)
|
||||
all_words.add(word)
|
||||
|
||||
# 写入文件
|
||||
for freq, words in word_map.items():
|
||||
@ -231,6 +247,12 @@ def generate_wordlist():
|
||||
f.write(word + '\n')
|
||||
logging.info(f'✅ 写入完成: {filename} ({len(words)} 个单词)')
|
||||
|
||||
# 写全量
|
||||
filename = os.path.join(RESULT_DIR, 'wordlist.txt')
|
||||
with open(filename, 'w', encoding='utf-8') as f:
|
||||
for word in all_words:
|
||||
f.write(word + '\n')
|
||||
logging.info(f'✅ 写入完成: {filename} ({len(all_words)} 个单词)')
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
Reference in New Issue
Block a user