modify scripts
This commit is contained in:
@ -3,6 +3,8 @@ import json
|
||||
import random
|
||||
import time
|
||||
import logging
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from openai import AzureOpenAI
|
||||
|
||||
@ -158,6 +160,91 @@ def process_folder(folder):
|
||||
|
||||
time.sleep(2) # 每批之间暂停
|
||||
|
||||
# redo逻辑
|
||||
def redo_results():
|
||||
files = sorted(Path(RESULT_DIR).glob('*.json'))
|
||||
for f in files:
|
||||
if 'matched' in f.name:
|
||||
continue
|
||||
|
||||
if __name__ == "__main__":
|
||||
process_folder(WORDS_DIR)
|
||||
logging.info(f"Redoing {f}")
|
||||
try:
|
||||
with open(f, 'r', encoding='utf-8') as fp:
|
||||
data = json.load(fp)
|
||||
words = data.get("req")
|
||||
if not words:
|
||||
logging.warning(f"No req in {f}")
|
||||
continue
|
||||
|
||||
prompt = build_prompt(words)
|
||||
resp_text = call_openai_with_retry(prompt)
|
||||
if resp_text is None:
|
||||
logging.warning(f"Failed to get response: {f}")
|
||||
continue
|
||||
|
||||
try:
|
||||
resp_json = json.loads(resp_text)
|
||||
if len(words) == len(resp_json):
|
||||
logging.info(f"get correct response. rewrite file. {f}")
|
||||
f.unlink()
|
||||
save_result(int(f.name[:5]), words, resp_json, True)
|
||||
else:
|
||||
logging.warning(f"response not complete: {f}, req len: {len(words)}, rsp len: {len(resp_json)}")
|
||||
except json.JSONDecodeError:
|
||||
logging.warning(f"response is not valid JSON: {f}")
|
||||
|
||||
time.sleep(2) # 每批之间暂停
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing {f}: {e}")
|
||||
|
||||
|
||||
# 检测是否无重复字母
|
||||
def has_no_repeated_letters(word):
|
||||
return len(set(word)) == len(word)
|
||||
|
||||
def generate_wordlist():
|
||||
"""
|
||||
从 RESULT_DIR 下的 matched 文件中提取无重复字母的单词,并按 f 分类写入 words_{f}.txt
|
||||
"""
|
||||
word_map = defaultdict(list)
|
||||
|
||||
for file in sorted(os.scandir(RESULT_DIR), key=lambda f: int(f.name[:5])):
|
||||
if 'matched' not in file.name:
|
||||
continue
|
||||
|
||||
with open(file.path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
rsp = data.get('rsp', [])
|
||||
for item in rsp:
|
||||
word = item.get('w')
|
||||
freq = item.get('f')
|
||||
if word and freq and has_no_repeated_letters(word):
|
||||
word_map[freq].append(word)
|
||||
|
||||
# 写入文件
|
||||
for freq, words in word_map.items():
|
||||
filename = os.path.join(RESULT_DIR, f'words_{freq}.txt')
|
||||
with open(filename, 'w', encoding='utf-8') as f:
|
||||
for word in words:
|
||||
f.write(word + '\n')
|
||||
logging.info(f'✅ 写入完成: {filename} ({len(words)} 个单词)')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('cmd', help='执行的命令: init / redo / gen')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.cmd == 'init':
|
||||
process_folder(WORDS_DIR)
|
||||
elif args.cmd == 'redo':
|
||||
redo_results()
|
||||
elif args.cmd == 'gen':
|
||||
generate_wordlist()
|
||||
else:
|
||||
print("❌ 未知命令,请使用: all / redo / gen")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user