modify scripts
This commit is contained in:
@ -3,9 +3,10 @@ using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using Unity.Sentis;
|
||||
using Unity.InferenceEngine;
|
||||
using UnityEngine;
|
||||
using System.Text;
|
||||
using System.Globalization;
|
||||
using WordsToolkit.Scripts.Levels;
|
||||
using WordsToolkit.Scripts.Services;
|
||||
using WordsToolkit.Scripts.Services.BannedWords;
|
||||
@ -36,6 +37,32 @@ namespace WordsToolkit.Scripts.NLP
|
||||
// NOTE: This is now mainly for the old SaveModelBinary method - new architecture uses custom words files
|
||||
private bool protectBinaryFile = false;
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes text by removing diacritics, accents, and converting to lowercase.
|
||||
/// This allows word matching to ignore emphasis marks.
|
||||
/// </summary>
|
||||
private string NormalizeText(string text)
|
||||
{
|
||||
if (string.IsNullOrEmpty(text))
|
||||
return text;
|
||||
|
||||
text = text.ToLower();
|
||||
|
||||
var normalizedString = text.Normalize(NormalizationForm.FormD);
|
||||
var stringBuilder = new StringBuilder();
|
||||
|
||||
foreach (var c in normalizedString)
|
||||
{
|
||||
var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
|
||||
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
|
||||
{
|
||||
stringBuilder.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
|
||||
}
|
||||
|
||||
public bool IsModelLoaded(string language = null)
|
||||
{
|
||||
language = language ?? (languageService?.GetCurrentLanguageCode() ?? m_DefaultLanguage);
|
||||
@ -82,6 +109,7 @@ namespace WordsToolkit.Scripts.NLP
|
||||
|
||||
public void LoadModels()
|
||||
{
|
||||
InitializeFromConfiguration();
|
||||
foreach (var languagePair in languageModels)
|
||||
{
|
||||
LoadModelBin(languagePair.Key, languagePair.Value);
|
||||
@ -191,29 +219,62 @@ namespace WordsToolkit.Scripts.NLP
|
||||
LoadCustomWordsFromBinary(language);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads bytes from StreamingAssets using UnityWebRequest for Android compatibility
|
||||
/// </summary>
|
||||
private byte[] LoadStreamingAssetBytes(string path)
|
||||
{
|
||||
try
|
||||
{
|
||||
#if UNITY_ANDROID && !UNITY_EDITOR
|
||||
using var request = UnityEngine.Networking.UnityWebRequest.Get(path);
|
||||
var operation = request.SendWebRequest();
|
||||
while (!operation.isDone) { }
|
||||
|
||||
if (request.result == UnityEngine.Networking.UnityWebRequest.Result.Success)
|
||||
{
|
||||
return request.downloadHandler.data;
|
||||
}
|
||||
return null;
|
||||
#else
|
||||
if (File.Exists(path))
|
||||
{
|
||||
return File.ReadAllBytes(path);
|
||||
}
|
||||
return null;
|
||||
#endif
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Debug.LogError($"[ModelController] Exception in LoadStreamingAssetBytes: {e.Message}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads custom words from binary file and adds them to the existing vocabulary.
|
||||
/// Binary file contains ONLY custom words, not the entire model cache.
|
||||
/// </summary>
|
||||
private void LoadCustomWordsFromBinary(string language)
|
||||
{
|
||||
string path = Path.Combine(Application.dataPath, "WordsToolkit", "model",
|
||||
string path = Path.Combine(Application.streamingAssetsPath, "WordConnectGameToolkit", "model",
|
||||
"custom", $"{language}_custom_words.bin");
|
||||
|
||||
if (!File.Exists(path))
|
||||
if (!wordToIndexByLanguage.ContainsKey(language))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!wordToIndexByLanguage.ContainsKey(language))
|
||||
byte[] fileData = LoadStreamingAssetBytes(path);
|
||||
if (fileData == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var fs = new FileStream(path, FileMode.Open, FileAccess.Read);
|
||||
using var br = new BinaryReader(fs, Encoding.UTF8);
|
||||
using var ms = new MemoryStream(fileData);
|
||||
using var br = new BinaryReader(ms, Encoding.UTF8);
|
||||
|
||||
// Read header
|
||||
if (br.ReadInt32() != 0x43555354) // "CUST" magic number
|
||||
@ -265,9 +326,11 @@ namespace WordsToolkit.Scripts.NLP
|
||||
return;
|
||||
}
|
||||
|
||||
string dir = Path.Combine(Application.dataPath, "WordsToolkit", "model", "custom");
|
||||
string path = Path.Combine(dir, $"{language}_custom_words.bin");
|
||||
Directory.CreateDirectory(dir);
|
||||
// Create StreamingAssets folder structure
|
||||
string streamingAssetsDir = Path.Combine(Application.dataPath, "StreamingAssets");
|
||||
string modelDir = Path.Combine(streamingAssetsDir, "WordConnectGameToolkit", "model", "custom");
|
||||
string path = Path.Combine(modelDir, $"{language}_custom_words.bin");
|
||||
Directory.CreateDirectory(modelDir);
|
||||
|
||||
try
|
||||
{
|
||||
@ -353,7 +416,8 @@ namespace WordsToolkit.Scripts.NLP
|
||||
|
||||
foreach (var pair in wordIndexDict)
|
||||
{
|
||||
wordToIndex[pair.Key] = pair.Value;
|
||||
string normalizedWord = NormalizeText(pair.Key);
|
||||
wordToIndex[normalizedWord] = pair.Value;
|
||||
}
|
||||
|
||||
|
||||
@ -379,6 +443,7 @@ namespace WordsToolkit.Scripts.NLP
|
||||
return null;
|
||||
}
|
||||
|
||||
word = NormalizeText(word);
|
||||
if (!wordToIndexByLanguage[language].ContainsKey(word))
|
||||
{
|
||||
return null;
|
||||
@ -413,7 +478,8 @@ namespace WordsToolkit.Scripts.NLP
|
||||
public bool IsWordKnown(string word, string language = null)
|
||||
{
|
||||
language = language ?? (languageService?.GetCurrentLanguageCode() ?? m_DefaultLanguage);
|
||||
if (bannedWordsService.IsWordBanned(word, language))
|
||||
string normalizedWord = NormalizeText(word);
|
||||
if (bannedWordsService.IsWordBanned(normalizedWord, language))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -451,6 +517,8 @@ namespace WordsToolkit.Scripts.NLP
|
||||
return -1f;
|
||||
}
|
||||
|
||||
word1 = NormalizeText(word1);
|
||||
word2 = NormalizeText(word2);
|
||||
float[] vector1 = GetWordVector(word1, language);
|
||||
float[] vector2 = GetWordVector(word2, language);
|
||||
|
||||
@ -466,10 +534,11 @@ namespace WordsToolkit.Scripts.NLP
|
||||
|
||||
if (!IsModelLoaded(language))
|
||||
{
|
||||
Debug.LogWarning($"[ModelController] AddWord failed – model for '{language}' not loaded.");
|
||||
Debug.LogWarning($"[ModelController] AddWord failed – model for '{language}' not loaded.");
|
||||
return false;
|
||||
}
|
||||
|
||||
newWord = NormalizeText(newWord);
|
||||
if (wordToIndexByLanguage[language].ContainsKey(newWord))
|
||||
{
|
||||
Debug.LogWarning($"[ModelController] Word '{newWord}' already exists in vocab.");
|
||||
@ -523,8 +592,8 @@ namespace WordsToolkit.Scripts.NLP
|
||||
Buffer.BlockCopy(oldBuf, 0, newBuf, 0, oldElems * sizeof(float));
|
||||
Buffer.BlockCopy(newVector,0, newBuf, oldElems * sizeof(float), dim * sizeof(float));
|
||||
|
||||
// Sentis requires a non‑generic NativeTensorArrayFromManagedArray
|
||||
// Sentis requires (Array, bytesPerElem, length, channels)
|
||||
// Inference Engine requires a non‑generic NativeTensorArrayFromManagedArray
|
||||
// Inference Engine requires (Array, bytesPerElem, length, channels)
|
||||
// ctor args: (Array data, int srcElementOffset, int srcElementSize, int numDestElement)
|
||||
var newWeights = new NativeTensorArrayFromManagedArray(
|
||||
newBuf, // managed float[]
|
||||
@ -638,7 +707,7 @@ namespace WordsToolkit.Scripts.NLP
|
||||
if (string.IsNullOrEmpty(inputSymbols))
|
||||
return new List<string>();
|
||||
|
||||
inputSymbols = inputSymbols.ToLower();
|
||||
inputSymbols = NormalizeText(inputSymbols);
|
||||
Dictionary<char, int> charCounts = new Dictionary<char, int>();
|
||||
foreach (char c in inputSymbols)
|
||||
{
|
||||
@ -702,7 +771,8 @@ namespace WordsToolkit.Scripts.NLP
|
||||
if (string.IsNullOrEmpty(inputSymbols))
|
||||
return null;
|
||||
|
||||
var symbolSet = new HashSet<char>(inputSymbols.ToLower());
|
||||
inputSymbols = NormalizeText(inputSymbols);
|
||||
var symbolSet = new HashSet<char>(inputSymbols);
|
||||
|
||||
var bestMatches = wordToIndexByLanguage[language].Keys
|
||||
.Select(word => new {
|
||||
@ -855,7 +925,7 @@ namespace WordsToolkit.Scripts.NLP
|
||||
/// <param name="language">Language to clear, or null to clear all</param>
|
||||
public void ClearCustomWordsCache(string language = null)
|
||||
{
|
||||
string customDir = Path.Combine(Application.dataPath, "WordsToolkit", "model", "custom");
|
||||
string customDir = Path.Combine(Application.dataPath, "StreamingAssets", "WordConnectGameToolkit", "model", "custom");
|
||||
|
||||
if (!Directory.Exists(customDir))
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user