/// <summary> /// Expands an affix compressed base word /// </summary> /// <param name="word" type="GuruComponents.Netrix.SpellChecker.NetSpell.Dictionary.Word"> /// <para> /// The word to expand /// </para> /// </param> /// <returns> /// A System.Collections.ArrayList of words expanded from base word /// </returns> public ArrayList ExpandWord(Word word) { ArrayList suffixWords = new ArrayList(); ArrayList words = new ArrayList(); suffixWords.Add(word.Text); string prefixKeys = ""; // check suffix keys first foreach (char key in word.AffixKeys) { if (_suffixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { AffixRule rule = _suffixRules[key.ToString(CultureInfo.CurrentUICulture)]; string tempWord = AffixUtility.AddSuffix(word.Text, rule); if (tempWord != word.Text) { if (rule.AllowCombine) { suffixWords.Add(tempWord); } else { words.Add(tempWord); } } } else if (_prefixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { prefixKeys += key.ToString(CultureInfo.CurrentUICulture); } } // apply prefixes foreach (char key in prefixKeys) { AffixRule rule = _prefixRules[key.ToString(CultureInfo.CurrentUICulture)]; // apply prefix to all suffix words foreach (string suffixWord in suffixWords) { string tempWord = AffixUtility.AddPrefix(suffixWord, rule); if (tempWord != suffixWord) { words.Add(tempWord); } } } words.AddRange(suffixWords); //System.Diagnostics.Debug.WriteLine("Word Expanded: {0}; Child Words: {1}", word.Text, words.Count); return(words); }
/// <summary> /// Expands an affix compressed base word /// </summary> /// <param name="word" type="NetSpell.SpellChecker.Dictionary.Word"> /// <para> /// The word to expand /// </para> /// </param> /// <returns> /// A System.Collections.Generic.List<string> of words expanded from base word /// </returns> public List <string> ExpandWord(Word word) { List <string> suffixWords = new List <string>(); List <string> words = new List <string>(); suffixWords.Add(word.Text); string prefixKeys = ""; // check suffix keys first foreach (char key in word.AffixKeys) { if (_suffixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { AffixRule rule = _suffixRules[key.ToString(CultureInfo.CurrentUICulture)]; string tempWord = AffixUtility.AddSuffix(word.Text, rule); if (tempWord != word.Text) { if (rule.AllowCombine) { suffixWords.Add(tempWord); } else { words.Add(tempWord); } } } else if (_prefixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { prefixKeys += key.ToString(CultureInfo.CurrentUICulture); } } // apply prefixes foreach (char key in prefixKeys) { AffixRule rule = _prefixRules[key.ToString(CultureInfo.CurrentUICulture)]; // apply prefix to all suffix words foreach (string suffixWord in suffixWords) { string tempWord = AffixUtility.AddPrefix(suffixWord, rule); if (tempWord != suffixWord) { words.Add(tempWord); } } } words.AddRange(suffixWords); TraceWriter.TraceVerbose("Word Expanded: {0}; Child Words: {1}", word.Text, words.Count); return(words); }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first _baseWords.Clear(); _replaceCharacters.Clear(); _prefixRules.Clear(); _suffixRules.Clear(); _phoneticRules.Clear(); _tryCharacters = ""; //******Below Lines Added********* if (_dictionaryFile.StartsWith(".")) { _dictionaryFile = Thread.CurrentThread.CurrentCulture.Name; } // add the default file extension if (!_dictionaryFile.EndsWith(".dic")) { _dictionaryFile += ".dic"; } //******Above Lines Added********* // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; // Simplify our lives by assuming the default path if (_dictionaryFolder == null) { _dictionaryFolder = Path.Combine(Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location), "Dictionary"); } string dictionaryPath = Path.Combine(_dictionaryFolder, _dictionaryFile); System.Diagnostics.Debug.WriteLine("Loading Dictionary:{0}", dictionaryPath); // open dictionary file if (!File.Exists(dictionaryPath)) { throw new FileNotFoundException("Cannot find dictonary with location " + dictionaryPath); } //BinaryReader reader = new BinaryReader(File.Open("foo.txt", FileMode.Open), // Encoding.GetEncoding(1252)); FileStream fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); StreamReader sr = new StreamReader(fs, true); // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length > 0) { // check for section flag switch (tempLine) { case "[Copyright]": case "[Try]": case "[Replace]": case "[Prefix]": case "[Suffix]": case "[Phonetic]": case "[Words]": // set current section that is being parsed currentSection = tempLine; break; default: // parse line and place in correct object switch (currentSection) { case "[Copyright]": this.Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars this.TryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars this.ReplaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection this.PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection this.SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars if (partMatches[2].Value != "0") { entry.AddCharacters = partMatches[2].Value; } // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } this.BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection swith break; } //tempLine switch } // if templine } // read line // close files sr.Close(); fs.Close(); //System.Diagnostics.Debug.WriteLine("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", // this.BaseWords.Count, this.PrefixRules.Count, this.SuffixRules.Count, this.PhoneticRules.Count); this.LoadUserFile(); _initialized = true; }
/// <summary> /// Searches all contained word lists for word /// </summary> /// <param name="word" type="string"> /// <para> /// The word to search for /// </para> /// </param> /// <returns> /// Returns true if word is found /// </returns> public bool Contains(string word) { // clean up possible base word list _possibleBaseWords.Clear(); // Step 1 Search UserWords if (_userWords.ContainsKey(word)) { System.Diagnostics.Debug.WriteLine("Word Found in User Dictionary: {0}", word); return(true); // word found } // Step 2 Search BaseWords if (_baseWords.ContainsKey(word)) { System.Diagnostics.Debug.WriteLine("Word Found in Base Words: {0}", word); return(true); // word found } // Step 3 Remove suffix, Search BaseWords // save suffixed words for use when removing prefix List <string> suffixWords = new List <string>(); // Add word to suffix word list suffixWords.Add(word); foreach (AffixRule rule in SuffixRules.Values) { foreach (AffixEntry entry in rule.AffixEntries) { string tempWord = AffixUtility.RemoveSuffix(word, entry); if (tempWord != word) { if (_baseWords.ContainsKey(tempWord)) { if (this.VerifyAffixKey(tempWord, rule.Name[0])) { System.Diagnostics.Debug.WriteLine(tempWord + " " + rule.Name[0], "Word Found With Base Words: Suffix Key: "); return(true); // word found } } if (rule.AllowCombine) { // saving word to check if it is a word after prefix is removed suffixWords.Add(tempWord); } else { // saving possible base words for use in generating suggestions _possibleBaseWords.Add(tempWord); } } } } // saving possible base words for use in generating suggestions _possibleBaseWords.AddRange(suffixWords); // Step 4 Remove Prefix, Search BaseWords foreach (AffixRule rule in PrefixRules.Values) { foreach (AffixEntry entry in rule.AffixEntries) { foreach (string suffixWord in suffixWords) { string tempWord = AffixUtility.RemovePrefix(suffixWord, entry); if (tempWord != suffixWord) { if (_baseWords.ContainsKey(tempWord)) { if (this.VerifyAffixKey(tempWord, rule.Name[0])) { //System.Diagnostics.Debug.WriteLine("Word Found With Base Words: {0}; Prefix Key: {1}", tempWord, rule.Name[0]); return(true); // word found } } // saving possible base words for use in generating suggestions _possibleBaseWords.Add(tempWord); } } // suffix word } // prefix rule entry } // prefix rule // word not found //System.Diagnostics.Debug.WriteLine("Possible Base Words: {0}", _possibleBaseWords.Count); return(false); }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first _baseWords.Clear(); _replaceCharacters.Clear(); _prefixRules.Clear(); _suffixRules.Clear(); _phoneticRules.Clear(); _tryCharacters = ""; // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; string dictionaryPath = Path.Combine(_dictionaryFolder, _dictionaryFile); TraceWriter.TraceInfo("Loading Dictionary:{0}", dictionaryPath); // open dictionary file FileStream fs = null; try { fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); using (var sr = new StreamReader(fs, Encoding.UTF8)) { fs = null; // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length <= 0) { continue; } // check for section flag if (tempLine.StartsWith("[") && tempLine.EndsWith("]")) { // set current section that is being parsed currentSection = tempLine; continue; } // parse line and place in correct object switch (currentSection) { case "[Copyright]": Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars TryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars ReplaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection switch } // read line // close files } } finally { if (fs != null) { fs.Dispose(); } } TraceWriter.TraceInfo("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", BaseWords.Count, PrefixRules.Count, SuffixRules.Count, PhoneticRules.Count); LoadUserFile(); _initialized = true; }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public WordDictionary(TextReader inputDictionary) { for (byte i = 0; i < 128; i++) { _encodeTable[i] = i; _decodeTable[i] = (char)i; } MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; while (inputDictionary.Peek() >= 0) { string tempLine = encode(inputDictionary.ReadLine().Trim()); if (tempLine.Length > 0) { // check for section flag switch (tempLine) { case "[Copyright]": case "[Try]": case "[Replace]": case "[Prefix]": case "[Suffix]": case "[Phonetic]": case "[Words]": // set current section that is being parsed currentSection = tempLine; break; default: // parse line and place in correct object switch (currentSection) { case "[Copyright]": //this.Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars _tryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars _replaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection _prefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection _suffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } _baseWords[tempWord.Text] = tempWord; break; } break; } } } }
/// <summary> /// Searches all contained word lists for word. /// </summary> /// <param name="word" type="string"> /// The word to search for. /// </param> /// <returns> /// Returns true if word is found. /// </returns> public ContainsResult Contains(string word) { word = encode(word); // clean up possible base word list List <string> possibleBaseWords = new List <string>(); // Step 2 Search BaseWords if (_baseWords.ContainsKey(word)) { return(new ContainsResult(true, null)); } // Step 3 Remove suffix, Search BaseWords // save suffixed words for use when removing prefix List <string> suffixWords = new List <string>(); // Add word to suffix word list suffixWords.Add(word); foreach (AffixRule rule in _suffixRules.Values) { AffixEntry[] entries = rule.AffixEntries.ToArray(); int entryCount = entries.Length; for (int i = 0; i < entryCount; i++) { AffixEntry entry = entries[i]; string tempWord = AffixUtility.RemoveSuffix(word, entry); if (tempWord != null) { if (_baseWords.ContainsKey(tempWord)) { if (verifyAffixKey(tempWord, rule.Name[0])) { return(new ContainsResult(true, null)); } } if (rule.AllowCombine) { // saving word to check if it is a word after prefix is removed suffixWords.Add(tempWord); } else { // saving possible base words for use in generating suggestions possibleBaseWords.Add(tempWord); } } } } string[] suffixWordsArr = suffixWords.ToArray(); // saving possible base words for use in generating suggestions possibleBaseWords.AddRange(suffixWords); // Step 4 Remove Prefix, Search BaseWords foreach (AffixRule rule in _prefixRules.Values) { AffixEntry[] entries = rule.AffixEntries.ToArray(); int entryCount = entries.Length; for (int i = 0; i < entryCount; i++) { AffixEntry entry = entries[i]; int suffixWordsCount = suffixWordsArr.Length; for (int j = 0; j < suffixWordsCount; j++) { string tempWord = AffixUtility.RemovePrefix(suffixWordsArr[j], entry); if (tempWord != null) { if (_baseWords.ContainsKey(tempWord)) { if (verifyAffixKey(tempWord, rule.Name[0])) { return(new ContainsResult(true, null)); } } possibleBaseWords.Add(tempWord); } } } } return(new ContainsResult(false, possibleBaseWords.ToArray())); }