modify scripts

This commit is contained in:
2025-10-17 10:59:23 +08:00
parent 9336ed0d6f
commit 4f782a638e
131 changed files with 79880 additions and 3549 deletions

View File

@ -3,9 +3,10 @@ using System.Collections.Generic;
using System.IO;
using System.Linq;
using Newtonsoft.Json.Linq;
using Unity.Sentis;
using Unity.InferenceEngine;
using UnityEngine;
using System.Text;
using System.Globalization;
using WordsToolkit.Scripts.Levels;
using WordsToolkit.Scripts.Services;
using WordsToolkit.Scripts.Services.BannedWords;
@ -36,6 +37,32 @@ namespace WordsToolkit.Scripts.NLP
// NOTE: This is now mainly for the old SaveModelBinary method - new architecture uses custom words files
private bool protectBinaryFile = false;
/// <summary>
/// Normalizes text by removing diacritics, accents, and converting to lowercase.
/// This allows word matching to ignore emphasis marks.
/// </summary>
private string NormalizeText(string text)
{
if (string.IsNullOrEmpty(text))
return text;
text = text.ToLower();
var normalizedString = text.Normalize(NormalizationForm.FormD);
var stringBuilder = new StringBuilder();
foreach (var c in normalizedString)
{
var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
{
stringBuilder.Append(c);
}
}
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
}
public bool IsModelLoaded(string language = null)
{
language = language ?? (languageService?.GetCurrentLanguageCode() ?? m_DefaultLanguage);
@ -82,6 +109,7 @@ namespace WordsToolkit.Scripts.NLP
public void LoadModels()
{
InitializeFromConfiguration();
foreach (var languagePair in languageModels)
{
LoadModelBin(languagePair.Key, languagePair.Value);
@ -191,29 +219,62 @@ namespace WordsToolkit.Scripts.NLP
LoadCustomWordsFromBinary(language);
}
/// <summary>
/// Loads bytes from StreamingAssets using UnityWebRequest for Android compatibility
/// </summary>
private byte[] LoadStreamingAssetBytes(string path)
{
try
{
#if UNITY_ANDROID && !UNITY_EDITOR
using var request = UnityEngine.Networking.UnityWebRequest.Get(path);
var operation = request.SendWebRequest();
while (!operation.isDone) { }
if (request.result == UnityEngine.Networking.UnityWebRequest.Result.Success)
{
return request.downloadHandler.data;
}
return null;
#else
if (File.Exists(path))
{
return File.ReadAllBytes(path);
}
return null;
#endif
}
catch (Exception e)
{
Debug.LogError($"[ModelController] Exception in LoadStreamingAssetBytes: {e.Message}");
return null;
}
}
/// <summary>
/// Loads custom words from binary file and adds them to the existing vocabulary.
/// Binary file contains ONLY custom words, not the entire model cache.
/// </summary>
private void LoadCustomWordsFromBinary(string language)
{
string path = Path.Combine(Application.dataPath, "WordsToolkit", "model",
string path = Path.Combine(Application.streamingAssetsPath, "WordConnectGameToolkit", "model",
"custom", $"{language}_custom_words.bin");
if (!File.Exists(path))
if (!wordToIndexByLanguage.ContainsKey(language))
{
return;
}
if (!wordToIndexByLanguage.ContainsKey(language))
byte[] fileData = LoadStreamingAssetBytes(path);
if (fileData == null)
{
return;
}
try
{
using var fs = new FileStream(path, FileMode.Open, FileAccess.Read);
using var br = new BinaryReader(fs, Encoding.UTF8);
using var ms = new MemoryStream(fileData);
using var br = new BinaryReader(ms, Encoding.UTF8);
// Read header
if (br.ReadInt32() != 0x43555354) // "CUST" magic number
@ -265,9 +326,11 @@ namespace WordsToolkit.Scripts.NLP
return;
}
string dir = Path.Combine(Application.dataPath, "WordsToolkit", "model", "custom");
string path = Path.Combine(dir, $"{language}_custom_words.bin");
Directory.CreateDirectory(dir);
// Create StreamingAssets folder structure
string streamingAssetsDir = Path.Combine(Application.dataPath, "StreamingAssets");
string modelDir = Path.Combine(streamingAssetsDir, "WordConnectGameToolkit", "model", "custom");
string path = Path.Combine(modelDir, $"{language}_custom_words.bin");
Directory.CreateDirectory(modelDir);
try
{
@ -353,7 +416,8 @@ namespace WordsToolkit.Scripts.NLP
foreach (var pair in wordIndexDict)
{
wordToIndex[pair.Key] = pair.Value;
string normalizedWord = NormalizeText(pair.Key);
wordToIndex[normalizedWord] = pair.Value;
}
@ -379,6 +443,7 @@ namespace WordsToolkit.Scripts.NLP
return null;
}
word = NormalizeText(word);
if (!wordToIndexByLanguage[language].ContainsKey(word))
{
return null;
@ -413,7 +478,8 @@ namespace WordsToolkit.Scripts.NLP
public bool IsWordKnown(string word, string language = null)
{
language = language ?? (languageService?.GetCurrentLanguageCode() ?? m_DefaultLanguage);
if (bannedWordsService.IsWordBanned(word, language))
string normalizedWord = NormalizeText(word);
if (bannedWordsService.IsWordBanned(normalizedWord, language))
{
return false;
}
@ -451,6 +517,8 @@ namespace WordsToolkit.Scripts.NLP
return -1f;
}
word1 = NormalizeText(word1);
word2 = NormalizeText(word2);
float[] vector1 = GetWordVector(word1, language);
float[] vector2 = GetWordVector(word2, language);
@ -466,10 +534,11 @@ namespace WordsToolkit.Scripts.NLP
if (!IsModelLoaded(language))
{
Debug.LogWarning($"[ModelController] AddWord failed  model for '{language}' not loaded.");
Debug.LogWarning($"[ModelController] AddWord failed model for '{language}' not loaded.");
return false;
}
newWord = NormalizeText(newWord);
if (wordToIndexByLanguage[language].ContainsKey(newWord))
{
Debug.LogWarning($"[ModelController] Word '{newWord}' already exists in vocab.");
@ -523,8 +592,8 @@ namespace WordsToolkit.Scripts.NLP
Buffer.BlockCopy(oldBuf, 0, newBuf, 0, oldElems * sizeof(float));
Buffer.BlockCopy(newVector,0, newBuf, oldElems * sizeof(float), dim * sizeof(float));
// Sentis requires a nongeneric NativeTensorArrayFromManagedArray
// Sentis requires (Array, bytesPerElem, length, channels)
// Inference Engine requires a nongeneric NativeTensorArrayFromManagedArray
// Inference Engine requires (Array, bytesPerElem, length, channels)
// ctor args: (Array data, int srcElementOffset, int srcElementSize, int numDestElement)
var newWeights = new NativeTensorArrayFromManagedArray(
newBuf, // managed float[]
@ -638,7 +707,7 @@ namespace WordsToolkit.Scripts.NLP
if (string.IsNullOrEmpty(inputSymbols))
return new List<string>();
inputSymbols = inputSymbols.ToLower();
inputSymbols = NormalizeText(inputSymbols);
Dictionary<char, int> charCounts = new Dictionary<char, int>();
foreach (char c in inputSymbols)
{
@ -702,7 +771,8 @@ namespace WordsToolkit.Scripts.NLP
if (string.IsNullOrEmpty(inputSymbols))
return null;
var symbolSet = new HashSet<char>(inputSymbols.ToLower());
inputSymbols = NormalizeText(inputSymbols);
var symbolSet = new HashSet<char>(inputSymbols);
var bestMatches = wordToIndexByLanguage[language].Keys
.Select(word => new {
@ -855,7 +925,7 @@ namespace WordsToolkit.Scripts.NLP
/// <param name="language">Language to clear, or null to clear all</param>
public void ClearCustomWordsCache(string language = null)
{
string customDir = Path.Combine(Application.dataPath, "WordsToolkit", "model", "custom");
string customDir = Path.Combine(Application.dataPath, "StreamingAssets", "WordConnectGameToolkit", "model", "custom");
if (!Directory.Exists(customDir))
return;