modify scripts
This commit is contained in:
@ -3,6 +3,8 @@ import json
|
|||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
|
import argparse
|
||||||
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from openai import AzureOpenAI
|
from openai import AzureOpenAI
|
||||||
|
|
||||||
@ -158,6 +160,91 @@ def process_folder(folder):
|
|||||||
|
|
||||||
time.sleep(2) # 每批之间暂停
|
time.sleep(2) # 每批之间暂停
|
||||||
|
|
||||||
|
# redo逻辑
|
||||||
|
def redo_results():
|
||||||
|
files = sorted(Path(RESULT_DIR).glob('*.json'))
|
||||||
|
for f in files:
|
||||||
|
if 'matched' in f.name:
|
||||||
|
continue
|
||||||
|
|
||||||
if __name__ == "__main__":
|
logging.info(f"Redoing {f}")
|
||||||
process_folder(WORDS_DIR)
|
try:
|
||||||
|
with open(f, 'r', encoding='utf-8') as fp:
|
||||||
|
data = json.load(fp)
|
||||||
|
words = data.get("req")
|
||||||
|
if not words:
|
||||||
|
logging.warning(f"No req in {f}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
prompt = build_prompt(words)
|
||||||
|
resp_text = call_openai_with_retry(prompt)
|
||||||
|
if resp_text is None:
|
||||||
|
logging.warning(f"Failed to get response: {f}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp_json = json.loads(resp_text)
|
||||||
|
if len(words) == len(resp_json):
|
||||||
|
logging.info(f"get correct response. rewrite file. {f}")
|
||||||
|
f.unlink()
|
||||||
|
save_result(int(f.name[:5]), words, resp_json, True)
|
||||||
|
else:
|
||||||
|
logging.warning(f"response not complete: {f}, req len: {len(words)}, rsp len: {len(resp_json)}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logging.warning(f"response is not valid JSON: {f}")
|
||||||
|
|
||||||
|
time.sleep(2) # 每批之间暂停
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error processing {f}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# 检测是否无重复字母
|
||||||
|
def has_no_repeated_letters(word):
|
||||||
|
return len(set(word)) == len(word)
|
||||||
|
|
||||||
|
def generate_wordlist():
|
||||||
|
"""
|
||||||
|
从 RESULT_DIR 下的 matched 文件中提取无重复字母的单词,并按 f 分类写入 words_{f}.txt
|
||||||
|
"""
|
||||||
|
word_map = defaultdict(list)
|
||||||
|
|
||||||
|
for file in sorted(os.scandir(RESULT_DIR), key=lambda f: int(f.name[:5])):
|
||||||
|
if 'matched' not in file.name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(file.path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
rsp = data.get('rsp', [])
|
||||||
|
for item in rsp:
|
||||||
|
word = item.get('w')
|
||||||
|
freq = item.get('f')
|
||||||
|
if word and freq and has_no_repeated_letters(word):
|
||||||
|
word_map[freq].append(word)
|
||||||
|
|
||||||
|
# 写入文件
|
||||||
|
for freq, words in word_map.items():
|
||||||
|
filename = os.path.join(RESULT_DIR, f'words_{freq}.txt')
|
||||||
|
with open(filename, 'w', encoding='utf-8') as f:
|
||||||
|
for word in words:
|
||||||
|
f.write(word + '\n')
|
||||||
|
logging.info(f'✅ 写入完成: {filename} ({len(words)} 个单词)')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('cmd', help='执行的命令: init / redo / gen')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.cmd == 'init':
|
||||||
|
process_folder(WORDS_DIR)
|
||||||
|
elif args.cmd == 'redo':
|
||||||
|
redo_results()
|
||||||
|
elif args.cmd == 'gen':
|
||||||
|
generate_wordlist()
|
||||||
|
else:
|
||||||
|
print("❌ 未知命令,请使用: all / redo / gen")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user