// TBD... this is the bottle neck because so many real-words call this // needs to speed up // // public method // Get candidates from dictionary by Edit-distance: // 1. get all possible combinations from insert, remove, replace, switch // chars. However, it does not include space (so no split). // 2. check if the combination is in dictionary public static HashSet <string> GetCandidates(string inWord, CSpellApi cSpellApi) { int maxLength = cSpellApi.GetCanRw1To1WordMaxLength(); string inWordLc = inWord.ToLower(); // 1. get it from the memoery to speed up running time HashSet <string> candidates = candMap_.GetValueOrNull(inWordLc); // 2. generate candidates on the fly, find all possibile candidates if (candidates == null) { // 2.1. get all possible candidates // bottle neck for real-word: 7 min. HashSet <string> candidatesByEd = CandidatesUtil1To1.GetCandidatesByEd(inWord, maxLength); // filter out those are not valid words candidates = new HashSet <string>(); // 2.2. bottle neck for real-word: 2 min. foreach (string candByEd in candidatesByEd) { // check if valid one-to-one candidate word if (IsValid1To1Cand(inWordLc, candByEd, cSpellApi) == true) { candidates.Add(candByEd); } } // update candMap_ and save to memory to speed up runing time // TBD, need to set the maxKeyNo for candMap_ to prevent // max. key size need to be <= 2**31-1 = 2,147,483,647 // slow performance and crash could happen if too many keys if (candMap_.ContainsKey(inWordLc) == false) { candMap_[inWordLc] = candidates; // warning msg< suggest value: < 1,500,000,000 for performance int maxHashKeySize = cSpellApi.GetCanRw1To1CandMaxKeySize(); int hashKeySize = candMap_.Keys.Count; if (hashKeySize > maxHashKeySize) { if ((hashKeySize % 100) == 0) { Console.Error.WriteLine("** [email protected]: the size of key in RW-1To1-Cand-HashMap is too big (" + hashKeySize + " > " + maxHashKeySize + "). Please rerun the cSpell and increase the max. hash key size in the cSpell config (must < 2,147,483,647)."); } } } } return(candidates); }
// public method // Get candidates from dictionary by Edit-distance: // 1. get all possible combinations from insert, remove, replace, switch // chars. However, it does not include space (so no split). // 2. check if the combination is in dictionary public static HashSet <string> GetCandidates(string inWord, CSpellApi cSpellApi) { int maxLength = cSpellApi.GetCanNw1To1WordMaxLength(); // find all possibility HashSet <string> candidatesByEd = CandidatesUtil1To1.GetCandidatesByEd(inWord, maxLength); // filter out those are not valid words HashSet <string> candidates = new HashSet <string>(); foreach (string candByEd in candidatesByEd) { // check if valid one-to-one candidate word if (IsValid1To1Cand(inWord, candByEd, cSpellApi) == true) { candidates.Add(candByEd); } } return(candidates); }