/// <summary> /// Expands an affix compressed base word /// </summary> /// <param name="word" type="NetSpell.SpellChecker.Dictionary.Word"> /// <para> /// The word to expand /// </para> /// </param> /// <returns> /// A System.Collections.ArrayList of words expanded from base word /// </returns> public ArrayList ExpandWord(Word word) { ArrayList suffixWords = new ArrayList(); ArrayList words = new ArrayList(); suffixWords.Add(word.Text); string prefixKeys = ""; // check suffix keys first foreach(char key in word.AffixKeys) { if (_suffixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { AffixRule rule = _suffixRules[key.ToString(CultureInfo.CurrentUICulture)]; string tempWord = AffixUtility.AddSuffix(word.Text, rule); if (tempWord != word.Text) { if (rule.AllowCombine) { suffixWords.Add(tempWord); } else { words.Add(tempWord); } } } else if (_prefixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { prefixKeys += key.ToString(CultureInfo.CurrentUICulture); } } // apply prefixes foreach(char key in prefixKeys) { AffixRule rule = _prefixRules[key.ToString(CultureInfo.CurrentUICulture)]; // apply prefix to all suffix words foreach (string suffixWord in suffixWords) { string tempWord = AffixUtility.AddPrefix(suffixWord, rule); if (tempWord != suffixWord) { words.Add(tempWord); } } } words.AddRange(suffixWords); TraceWriter.TraceVerbose("Word Expanded: {0}; Child Words: {1}", word.Text, words.Count); return words; }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first _baseWords.Clear(); _replaceCharacters.Clear(); _prefixRules.Clear(); _suffixRules.Clear(); _phoneticRules.Clear(); _tryCharacters = ""; // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; string dictionaryPath = Path.Combine(_dictionaryFolder, _dictionaryFile); TraceWriter.TraceInfo("Loading Dictionary:{0}", dictionaryPath); // open dictionary file FileStream fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); StreamReader sr = new StreamReader(fs, Encoding.UTF8); // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length > 0) { // check for section flag switch (tempLine) { case "[Copyright]" : case "[Try]" : case "[Replace]" : case "[Prefix]" : case "[Suffix]" : case "[Phonetic]" : case "[Words]" : // set current section that is being parsed currentSection = tempLine; break; default : // parse line and place in correct object switch (currentSection) { case "[Copyright]" : this.Copyright += tempLine + "\r\n"; break; case "[Try]" : // ISpell try chars this.TryCharacters += tempLine; break; case "[Replace]" : // ISpell replace chars this.ReplaceCharacters.Add(tempLine); break; case "[Prefix]" : // MySpell prefix rules case "[Suffix]" : // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") currentRule.AllowCombine = true; // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection this.PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection this.SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") entry.StripCharacters = partMatches[1].Value; // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]" : // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]" : // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) tempWord.AffixKeys = parts[1]; // part 3 = phonetic code if (parts.Length >= 3) tempWord.PhoneticCode = parts[2]; this.BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection swith break; } //tempLine switch } // if templine } // read line // close files sr.Close(); fs.Close(); TraceWriter.TraceInfo("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", this.BaseWords.Count, this.PrefixRules.Count, this.SuffixRules.Count, this.PhoneticRules.Count); this.LoadUserFile(); _initialized = true; }
/// <summary> /// Populates the <see cref="Suggestions"/> property with word suggestions /// for the <see cref="CurrentWord"/> /// </summary> /// <remarks> /// <see cref="TestWord"/> must have been called before calling this method /// </remarks> /// <seealso cref="CurrentWord"/> /// <seealso cref="Suggestions"/> /// <seealso cref="TestWord"/> public void Suggest() { // can't generate suggestions with out current word if (this.CurrentWord.Length == 0) { TraceWriter.TraceWarning("No current word"); return; } this.Initialize(); ArrayList tempSuggestion = new ArrayList(); if ((_suggestionMode == SuggestionEnum.PhoneticNearMiss || _suggestionMode == SuggestionEnum.Phonetic) && _dictionary.PhoneticRules.Count > 0) { // generate phonetic code for possible root word Hashtable codes = new Hashtable(); foreach (string tempWord in _dictionary.PossibleBaseWords) { string tempCode = _dictionary.PhoneticCode(tempWord); if (tempCode.Length > 0 && !codes.ContainsKey(tempCode)) { codes.Add(tempCode, tempCode); } } if (codes.Count > 0) { // search root words for phonetic codes foreach (Word word in _dictionary.BaseWords.Values) { if (codes.ContainsKey(word.PhoneticCode)) { ArrayList words = _dictionary.ExpandWord(word); // add expanded words foreach (string expandedWord in words) { Word newWord = new Word(); newWord.Text = expandedWord; newWord.EditDistance = this.EditDistance(this.CurrentWord, expandedWord); tempSuggestion.Add(newWord); } } } } TraceWriter.TraceVerbose("Suggestiongs Found with Phonetic Stratagy: {0}", tempSuggestion.Count); } if (_suggestionMode == SuggestionEnum.PhoneticNearMiss || _suggestionMode == SuggestionEnum.NearMiss) { // suggestions for a typical fault of spelling, that // differs with more, than 1 letter from the right form. this.ReplaceChars(ref tempSuggestion); // swap out each char one by one and try all the tryme // chars in its place to see if that makes a good word this.BadChar(ref tempSuggestion); // try omitting one char of word at a time this.ExtraChar(ref tempSuggestion); // try inserting a tryme character before every letter this.ForgotChar(ref tempSuggestion); // split the string into two pieces after every char // if both pieces are good words make them a suggestion this.TwoWords(ref tempSuggestion); // try swapping adjacent chars one by one this.SwapChar(ref tempSuggestion); } TraceWriter.TraceVerbose("Total Suggestiongs Found: {0}", tempSuggestion.Count); tempSuggestion.Sort(); // sorts by edit score _suggestions.Clear(); for (int i = 0; i < tempSuggestion.Count; i++) { string word = ((Word)tempSuggestion[i]).Text; // looking for duplicates if (!_suggestions.Contains(word)) { // populating the suggestion list _suggestions.Add(word); } if (_suggestions.Count >= _maxSuggestions && _maxSuggestions > 0) { break; } } }
private void SuggestWord(string word, List<Word> tempSuggestion) { Word ws = new Word(); ws.Text = word; ws.EditDistance = EditDistance(CurrentWord, word); tempSuggestion.Add(ws); }
/// <summary> /// try swapping adjacent chars one by one /// </summary> private void SwapChar(ref ArrayList tempSuggestion) { for (int i = 0; i < this.CurrentWord.Length - 1; i++) { StringBuilder tempWord = new StringBuilder(this.CurrentWord); char swap = tempWord[i]; tempWord[i] = tempWord[i + 1]; tempWord[i + 1] = swap; if (this.TestWord(tempWord.ToString())) { Word ws = new Word(); ws.Text = tempWord.ToString().ToLower(); ws.EditDistance = this.EditDistance(this.CurrentWord, tempWord.ToString()); tempSuggestion.Add(ws); } } }
/// <summary> /// split the string into two pieces after every char /// if both pieces are good words make them a suggestion /// </summary> private void TwoWords(ref ArrayList tempSuggestion) { for (int i = 1; i < this.CurrentWord.Length - 1; i++) { string firstWord = this.CurrentWord.Substring(0, i); string secondWord = this.CurrentWord.Substring(i); if (this.TestWord(firstWord) && this.TestWord(secondWord)) { string tempWord = firstWord + " " + secondWord; Word ws = new Word(); ws.Text = tempWord.ToString().ToLower(); ws.EditDistance = this.EditDistance(this.CurrentWord, tempWord.ToString()); tempSuggestion.Add(ws); } } }
/// <summary> /// suggestions for a typical fault of spelling, that /// differs with more, than 1 letter from the right form. /// </summary> private void ReplaceChars(ref ArrayList tempSuggestion) { ArrayList replacementChars = this.Dictionary.ReplaceCharacters; for (int i = 0; i < replacementChars.Count; i++) { int split = ((string)replacementChars[i]).IndexOf(' '); string key = ((string)replacementChars[i]).Substring(0, split); string replacement = ((string)replacementChars[i]).Substring(split + 1); int pos = this.CurrentWord.IndexOf(key); while (pos > -1) { string tempWord = this.CurrentWord.Substring(0, pos); tempWord += replacement; tempWord += this.CurrentWord.Substring(pos + key.Length); if (this.TestWord(tempWord)) { Word ws = new Word(); ws.Text = tempWord.ToString().ToLower(); ws.EditDistance = this.EditDistance(this.CurrentWord, tempWord.ToString()); tempSuggestion.Add(ws); } pos = this.CurrentWord.IndexOf(key, pos + 1); } } }
/// <summary> /// try inserting a tryme character before every letter /// </summary> private void ForgotChar(ref ArrayList tempSuggestion) { char[] tryme = this.Dictionary.TryCharacters.ToCharArray(); for (int i = 0; i <= this.CurrentWord.Length; i++) { for (int x = 0; x < tryme.Length; x++) { StringBuilder tempWord = new StringBuilder(this.CurrentWord); tempWord.Insert(i, tryme[x]); if (this.TestWord(tempWord.ToString())) { Word ws = new Word(); ws.Text = tempWord.ToString().ToLower(); ws.EditDistance = this.EditDistance(this.CurrentWord, tempWord.ToString()); tempSuggestion.Add(ws); } } } }
/// <summary> /// try omitting one char of word at a time /// </summary> private void ExtraChar(ref ArrayList tempSuggestion) { if (this.CurrentWord.Length > 1) { for (int i = 0; i < this.CurrentWord.Length; i++) { StringBuilder tempWord = new StringBuilder(this.CurrentWord); tempWord.Remove(i, 1); if (this.TestWord(tempWord.ToString())) { Word ws = new Word(); ws.Text = tempWord.ToString().ToLower(CultureInfo.CurrentUICulture); ws.EditDistance = this.EditDistance(this.CurrentWord, tempWord.ToString()); tempSuggestion.Add(ws); } } } }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first BaseWords.Clear(); ReplaceCharacters.Clear(); PrefixRules.Clear(); SuffixRules.Clear(); PhoneticRules.Clear(); TryCharacters = ""; // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; string dictionaryPath = Path.Combine(DictionaryFolder, _dictionaryFile); TraceWriter.TraceInfo("Loading Dictionary:{0}", dictionaryPath); // open dictionary file FileStream fs = null; try { fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); using (var sr = new StreamReader(fs, Encoding.UTF8)) { fs = null; // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length <= 0) { continue; } // check for section flag if (tempLine.StartsWith("[") && tempLine.EndsWith("]")) { // set current section that is being parsed currentSection = tempLine; continue; } // parse line and place in correct object switch (currentSection) { case "[Copyright]": Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars TryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars ReplaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; PhoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection switch } // read line // close files } } finally { fs?.Dispose(); } TraceWriter.TraceInfo("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", BaseWords.Count, PrefixRules.Count, SuffixRules.Count, PhoneticRules.Count); LoadUserFile(); Initialized = true; }
private void btnLookup_Click(object sender, System.EventArgs e) { // if saved and words > 0 if (_Words.Count == 0) { MessageBox.Show(this, "Dictionary contains no words!", "No Words", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } if (this.Changed) { if (MessageBox.Show(this, "Dictionary should be saved before phonetic cache is added. \r\n \r\n Save Dictonary Now?", "Save Dictonary", MessageBoxButtons.YesNo, MessageBoxIcon.Question) == DialogResult.Yes) { this.SaveDictionary(); } else { return; } } this.Cursor = Cursors.WaitCursor; WordDictionary dict = new WordDictionary(); dict.DictionaryFile = this.FileName; dict.Initialize(); string[] parts = _Words[(int)numUpDownWord.Value].ToString().Split('/'); Word word = new Word(); word.Text = parts[0]; if (parts.Length > 1) word.AffixKeys = parts[1]; if (parts.Length > 2) word.PhoneticCode = parts[2]; ArrayList words = dict.ExpandWord(word); this.listAffixWords.Items.Clear(); foreach (string tempWord in words) { this.listAffixWords.Items.Add(tempWord); } this.Cursor = Cursors.Default; }