modify scripts

This commit is contained in:
2025-07-18 20:43:50 +08:00
parent 29bd03b61f
commit 1dda30cd75

View File

@ -3,6 +3,8 @@ import json
import random import random
import time import time
import logging import logging
import argparse
from collections import defaultdict
from pathlib import Path from pathlib import Path
from openai import AzureOpenAI from openai import AzureOpenAI
@ -158,6 +160,91 @@ def process_folder(folder):
time.sleep(2) # 每批之间暂停 time.sleep(2) # 每批之间暂停
# redo逻辑
def redo_results():
files = sorted(Path(RESULT_DIR).glob('*.json'))
for f in files:
if 'matched' in f.name:
continue
if __name__ == "__main__": logging.info(f"Redoing {f}")
try:
with open(f, 'r', encoding='utf-8') as fp:
data = json.load(fp)
words = data.get("req")
if not words:
logging.warning(f"No req in {f}")
continue
prompt = build_prompt(words)
resp_text = call_openai_with_retry(prompt)
if resp_text is None:
logging.warning(f"Failed to get response: {f}")
continue
try:
resp_json = json.loads(resp_text)
if len(words) == len(resp_json):
logging.info(f"get correct response. rewrite file. {f}")
f.unlink()
save_result(int(f.name[:5]), words, resp_json, True)
else:
logging.warning(f"response not complete: {f}, req len: {len(words)}, rsp len: {len(resp_json)}")
except json.JSONDecodeError:
logging.warning(f"response is not valid JSON: {f}")
time.sleep(2) # 每批之间暂停
except Exception as e:
logging.error(f"Error processing {f}: {e}")
# 检测是否无重复字母
def has_no_repeated_letters(word):
return len(set(word)) == len(word)
def generate_wordlist():
"""
从 RESULT_DIR 下的 matched 文件中提取无重复字母的单词,并按 f 分类写入 words_{f}.txt
"""
word_map = defaultdict(list)
for file in sorted(os.scandir(RESULT_DIR), key=lambda f: int(f.name[:5])):
if 'matched' not in file.name:
continue
with open(file.path, 'r', encoding='utf-8') as f:
data = json.load(f)
rsp = data.get('rsp', [])
for item in rsp:
word = item.get('w')
freq = item.get('f')
if word and freq and has_no_repeated_letters(word):
word_map[freq].append(word)
# 写入文件
for freq, words in word_map.items():
filename = os.path.join(RESULT_DIR, f'words_{freq}.txt')
with open(filename, 'w', encoding='utf-8') as f:
for word in words:
f.write(word + '\n')
logging.info(f'✅ 写入完成: {filename} ({len(words)} 个单词)')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('cmd', help='执行的命令: init / redo / gen')
args = parser.parse_args()
if args.cmd == 'init':
process_folder(WORDS_DIR) process_folder(WORDS_DIR)
elif args.cmd == 'redo':
redo_results()
elif args.cmd == 'gen':
generate_wordlist()
else:
print("❌ 未知命令,请使用: all / redo / gen")
if __name__ == '__main__':
main()