modify scripts

This commit is contained in:
2025-07-18 20:43:50 +08:00
parent 29bd03b61f
commit 1dda30cd75

View File

@ -3,6 +3,8 @@ import json
import random
import time
import logging
import argparse
from collections import defaultdict
from pathlib import Path
from openai import AzureOpenAI
@ -158,6 +160,91 @@ def process_folder(folder):
time.sleep(2) # 每批之间暂停
# redo逻辑
def redo_results():
files = sorted(Path(RESULT_DIR).glob('*.json'))
for f in files:
if 'matched' in f.name:
continue
if __name__ == "__main__":
process_folder(WORDS_DIR)
logging.info(f"Redoing {f}")
try:
with open(f, 'r', encoding='utf-8') as fp:
data = json.load(fp)
words = data.get("req")
if not words:
logging.warning(f"No req in {f}")
continue
prompt = build_prompt(words)
resp_text = call_openai_with_retry(prompt)
if resp_text is None:
logging.warning(f"Failed to get response: {f}")
continue
try:
resp_json = json.loads(resp_text)
if len(words) == len(resp_json):
logging.info(f"get correct response. rewrite file. {f}")
f.unlink()
save_result(int(f.name[:5]), words, resp_json, True)
else:
logging.warning(f"response not complete: {f}, req len: {len(words)}, rsp len: {len(resp_json)}")
except json.JSONDecodeError:
logging.warning(f"response is not valid JSON: {f}")
time.sleep(2) # 每批之间暂停
except Exception as e:
logging.error(f"Error processing {f}: {e}")
# 检测是否无重复字母
def has_no_repeated_letters(word):
return len(set(word)) == len(word)
def generate_wordlist():
"""
从 RESULT_DIR 下的 matched 文件中提取无重复字母的单词,并按 f 分类写入 words_{f}.txt
"""
word_map = defaultdict(list)
for file in sorted(os.scandir(RESULT_DIR), key=lambda f: int(f.name[:5])):
if 'matched' not in file.name:
continue
with open(file.path, 'r', encoding='utf-8') as f:
data = json.load(f)
rsp = data.get('rsp', [])
for item in rsp:
word = item.get('w')
freq = item.get('f')
if word and freq and has_no_repeated_letters(word):
word_map[freq].append(word)
# 写入文件
for freq, words in word_map.items():
filename = os.path.join(RESULT_DIR, f'words_{freq}.txt')
with open(filename, 'w', encoding='utf-8') as f:
for word in words:
f.write(word + '\n')
logging.info(f'✅ 写入完成: {filename} ({len(words)} 个单词)')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('cmd', help='执行的命令: init / redo / gen')
args = parser.parse_args()
if args.cmd == 'init':
process_folder(WORDS_DIR)
elif args.cmd == 'redo':
redo_results()
elif args.cmd == 'gen':
generate_wordlist()
else:
print("❌ 未知命令,请使用: all / redo / gen")
if __name__ == '__main__':
main()