diff --git a/.gitignore b/.gitignore
index 3cd6f3a..a724648 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,9 @@ log/
 logs/
 *.log
 
+# 忽略数据文件
+tools/data/*
+
 # 忽略编译后的二进制文件
 bin/*
 obj/
diff --git a/gitignore b/gitignore
index 3cd6f3a..a724648 100644
--- a/gitignore
+++ b/gitignore
@@ -3,6 +3,9 @@ log/
 logs/
 *.log
 
+# 忽略数据文件
+tools/data/*
+
 # 忽略编译后的二进制文件
 bin/*
 obj/
diff --git a/tools/puzzle.py b/tools/puzzle.py
new file mode 100644
index 0000000..3f9ca31
--- /dev/null
+++ b/tools/puzzle.py
@@ -0,0 +1,157 @@
+import os
+import json
+import random
+import time
+import logging
+from pathlib import Path
+from openai import AzureOpenAI
+
+endpoint = "https://grammar.openai.azure.com/"
+model_name = "gpt-4o"
+deployment = "gpt4"
+
+subscription_key = "8b68c235b737488ab9a99983a14f8cca"
+api_version = "2024-12-01-preview"
+
+client = AzureOpenAI(
+    api_version=api_version,
+    azure_endpoint=endpoint,
+    api_key=subscription_key,
+)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s: %(message)s",
+    handlers=[logging.StreamHandler()]
+)
+
+BASE_DIR = './data'
+WORDS_DIR = f"{BASE_DIR}/EOWL-v1.1.2/LF Delimited Format"
+RESULT_DIR = f"{BASE_DIR}/result"
+os.makedirs(RESULT_DIR, exist_ok=True)
+TEMP_FILE = f"{BASE_DIR}/temp_words.txt"
+
+batch_words_size = 100
+
+def find_words_files(folder):
+    txt_files = []
+    for f in Path(folder).glob("*.txt"):
+        if "Words" in f.name:
+            txt_files.append(f)
+    return txt_files
+
+
+def collect_words(files):
+    words_set = set()
+    for file in files:
+        with open(file, 'r', encoding='utf-8') as f:
+            for line in f:
+                word = line.strip()
+                if len(word) >= 3:
+                    words_set.add(word)
+    return list(words_set)
+
+
+def write_temp(words):
+    with open(TEMP_FILE, 'w', encoding='utf-8') as f:
+        for word in words:
+            f.write(word + '\n')
+
+
+def read_batches(batch_size=batch_words_size):
+    with open(TEMP_FILE, 'r', encoding='utf-8') as f:
+        words = [line.strip() for line in f if line.strip()]
+    for i in range(0, len(words), batch_size):
+        yield words[i:i+batch_size]
+
+'''Please respond with pure JSON only, without any formatting or explanations.'''
+def build_prompt(words):
+    word_list = ", ".join(words)
+    prompt = f"""
+Please analyze the following list of English words and do the following:
+
+1. Classify each word into a theme (like Animals, Plants, Materials, Body Parts, Clothes & Accessories, Food & Drinks, Places, Transportation, Sports, Colors, Numbers, Emotions, Tools, People & Occupations, etc.).
+2. Identify the part of speech of each word (verb, noun, adjective, etc.).
+3. Mark the frequency of usage of each word in everyday English as High, Medium, or Low.
+4. Identify words with the same word root and group them.
+
+Please response with pure JSON only, without any formatting or explanations.
+Each object should have the keys: word, theme, part_of_speech, frequency, same_root_group.
+
+Here are the words:
+{word_list}
+"""
+    return prompt
+
+
+def call_openai_with_retry(prompt, retries=3, delay=5):
+    for attempt in range(retries):
+        try:
+            response = client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are an expert English linguist and lexicographer."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=16000,
+                temperature=0.7,
+                top_p=1.0,
+                model=deployment
+            )
+            #return response.choices[0].message.content.strip()
+            text = response.choices[0].message.content.strip()
+            # 如果还有 ```json 开头的，去掉
+            if text.startswith("```json"):
+                text = text[7:-3].strip()
+            return text
+        except Exception as e:
+            logging.warning(f"OpenAI request failed (attempt {attempt+1}): {e}")
+            time.sleep(delay)
+    logging.error("OpenAI request failed after all retries.")
+    return None
+
+
+def save_result(index, req, resp, is_json):
+    matched = True if is_json and len(req) == len(resp) else False
+    flag = "json" if is_json else "txt"
+    match_str = "matched" if matched else 'notmatch'
+    filename = f"{RESULT_DIR}/{str(index).zfill(5)}_{match_str}_{flag}.json"
+    data = {
+        'req_len': len(req), 
+        'rsp_len': len(resp) if is_json else 0, 
+        'match':matched, 
+        'req': req, 
+        'rsp': resp
+    }
+    with open(filename, 'w', encoding='utf-8') as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
+    logging.info(f"Saved result to {filename}")
+
+
+def process_folder(folder):
+    files = find_words_files(folder)
+    logging.info(f"Found {len(files)} files to process.")
+    words = collect_words(files)
+    logging.info(f"Collected {len(words)} unique words.")
+    write_temp(words)
+
+    for idx, batch in enumerate(read_batches(), 1):
+        logging.info(f"Processing batch {idx} with {len(batch)} words")
+        prompt = build_prompt(batch)
+        resp_text = call_openai_with_retry(prompt)
+
+        if resp_text is None:
+            save_result(idx, batch, "Failed to get response", False)
+            continue
+
+        try:
+            resp_json = json.loads(resp_text)
+            save_result(idx, batch, resp_json, True)
+        except json.JSONDecodeError:
+            logging.warning(f"Batch {idx} response is not valid JSON.")
+            save_result(idx, batch, resp_text, False)
+
+        time.sleep(2)  # 每批之间暂停
+
+
+if __name__ == "__main__":
+    process_folder(WORDS_DIR)
\ No newline at end of file