/// <summary> /// Adds a suffix to a word /// </summary> /// <param name="word" type="string"> /// <para> /// The word to get the suffix added to /// </para> /// </param> /// <param name="rule" type="NetSpell.SpellChecker.Dictionary.Affix.AffixRule"> /// <para> /// The AffixRule to use when adding the suffix /// </para> /// </param> /// <returns> /// The word with the suffix added /// </returns> public static string AddSuffix(string word, AffixRule rule) { foreach (AffixEntry entry in rule.AffixEntries) { // check that this entry is valid if (word.Length >= entry.ConditionCount) { int passCount = 0; for (int i = 0; i < entry.ConditionCount; i++) { int charCode = word[word.Length - (entry.ConditionCount - i)]; if ((entry.Condition[charCode] & (1 << i)) == (1 << i)) { passCount++; } else { break; } } if (passCount == entry.ConditionCount) { int tempLen = word.Length - entry.StripCharacters.Length; string tempWord = word.Substring(0, tempLen); tempWord += entry.AddCharacters; return(tempWord); } } } return(word); }
public static string AddPrefix(string word, AffixRule rule) { foreach (AffixEntry entry in rule.AffixEntries) { if (word.Length >= entry.ConditionCount) { int passCount = 0; for (int i = 0; i < entry.ConditionCount; i++) { int charCode = (int)word[i]; if ((entry.Condition[charCode] & (1 << i)) == (1 << i)) { passCount++; } else { break; } } if (passCount == entry.ConditionCount) { string tempWord = word.Substring(entry.StripCharacters.Length); tempWord = entry.AddCharacters + tempWord; return(tempWord); } } } return(word); }
/// <summary> /// Informs whether the newRule can be combined with appliedRule (to a word). /// </summary> /// <param name="newRule"></param> /// <param name="appliedRule"></param> /// <returns></returns> private bool CanCombineRules(AffixRule newRule, AffixRule appliedRule) { if (newRule == null || appliedRule == null) { return(true); } // Only 1 rule of each type (SuffixRule of PrefixRule) can be combined. if (newRule is SuffixRule && appliedRule is SuffixRule) { return(false); } if (newRule is PrefixRule && appliedRule is PrefixRule) { return(false); } // SuffixRule and PrefixRule can be comined if they are both combinable (CanCombine= true). if (newRule.CanCombine && appliedRule.CanCombine) { return(true); } return(false); }
/// <summary> /// Adds a suffix to a word /// </summary> /// <param name="word" type="string"> /// <para> /// The word to get the suffix added to /// </para> /// </param> /// <param name="rule" type="NetSpell.SpellChecker.Dictionary.Affix.AffixRule"> /// <para> /// The AffixRule to use when adding the suffix /// </para> /// </param> /// <returns> /// The word with the suffix added /// </returns> public static string AddSuffix(string word, AffixRule rule) { foreach (AffixEntry entry in rule.AffixEntries) { // check that this entry is valid if (word.Length >= entry.ConditionCount) { int passCount = 0; for (int i = 0; i < entry.ConditionCount; i++) { int charCode = (int)word[word.Length - (entry.ConditionCount - i)]; if ((entry.Condition[charCode] & (1 << i)) == (1 << i)) { passCount++; } else { break; } } if (passCount == entry.ConditionCount) { int tempLen = word.Length - entry.StripCharacters.Length; string tempWord = word.Substring(0, tempLen); tempWord += entry.AddCharacters; return tempWord; } } } return word; }
/// <summary> /// /// </summary> /// <param name="path"></param> private void LoadAffixesFile(string path) { StreamReader f = File.OpenText(path); string s; char[] sepChars = new char[] { ' ', '\t' }; AffixRule curAffixRule = null; affixRules = new AffixRulesCollection(); affixRulesLookup = new AffixRule['Z' + 1]; tryCharacters = ""; replacePatterns = new List <ReplacePattern> (); while ((s = f.ReadLine()) != null) { // ignore comment if (s.StartsWith("#")) { continue; } string[] data = s.Split(sepChars, StringSplitOptions.RemoveEmptyEntries); if (data.Length == 0) { continue; // empty string } if (data[0] == "PFX" || data[0] == "SFX") { if (data.Length == 4) { // add new rule curAffixRule = AffixRule.Parse(data); affixRules.Add(curAffixRule); affixRulesLookup[curAffixRule.Name] = curAffixRule; } else if (curAffixRule != null) { // add affix curAffixRule.ParseAndAddAffix(data); } } else { if (data[0] == "TRY") { tryCharacters = data[1]; } else if (data[0] == "REP" && data.Length == 3) { replacePatterns.Add(new ReplacePattern(data[1], data[2])); } curAffixRule = null; } } f.Close(); }
/// <summary> /// Expands an affix compressed base word /// </summary> /// <param name="word" type="GuruComponents.Netrix.SpellChecker.NetSpell.Dictionary.Word"> /// <para> /// The word to expand /// </para> /// </param> /// <returns> /// A System.Collections.ArrayList of words expanded from base word /// </returns> public ArrayList ExpandWord(Word word) { ArrayList suffixWords = new ArrayList(); ArrayList words = new ArrayList(); suffixWords.Add(word.Text); string prefixKeys = ""; // check suffix keys first foreach (char key in word.AffixKeys) { if (_suffixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { AffixRule rule = _suffixRules[key.ToString(CultureInfo.CurrentUICulture)]; string tempWord = AffixUtility.AddSuffix(word.Text, rule); if (tempWord != word.Text) { if (rule.AllowCombine) { suffixWords.Add(tempWord); } else { words.Add(tempWord); } } } else if (_prefixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { prefixKeys += key.ToString(CultureInfo.CurrentUICulture); } } // apply prefixes foreach (char key in prefixKeys) { AffixRule rule = _prefixRules[key.ToString(CultureInfo.CurrentUICulture)]; // apply prefix to all suffix words foreach (string suffixWord in suffixWords) { string tempWord = AffixUtility.AddPrefix(suffixWord, rule); if (tempWord != suffixWord) { words.Add(tempWord); } } } words.AddRange(suffixWords); //System.Diagnostics.Debug.WriteLine("Word Expanded: {0}; Child Words: {1}", word.Text, words.Count); return(words); }
/// <summary> /// Expands an affix compressed base word /// </summary> /// <param name="word" type="NetSpell.SpellChecker.Dictionary.Word"> /// <para> /// The word to expand /// </para> /// </param> /// <returns> /// A System.Collections.Generic.List<string> of words expanded from base word /// </returns> public List <string> ExpandWord(Word word) { List <string> suffixWords = new List <string>(); List <string> words = new List <string>(); suffixWords.Add(word.Text); string prefixKeys = ""; // check suffix keys first foreach (char key in word.AffixKeys) { if (_suffixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { AffixRule rule = _suffixRules[key.ToString(CultureInfo.CurrentUICulture)]; string tempWord = AffixUtility.AddSuffix(word.Text, rule); if (tempWord != word.Text) { if (rule.AllowCombine) { suffixWords.Add(tempWord); } else { words.Add(tempWord); } } } else if (_prefixRules.ContainsKey(key.ToString(CultureInfo.CurrentUICulture))) { prefixKeys += key.ToString(CultureInfo.CurrentUICulture); } } // apply prefixes foreach (char key in prefixKeys) { AffixRule rule = _prefixRules[key.ToString(CultureInfo.CurrentUICulture)]; // apply prefix to all suffix words foreach (string suffixWord in suffixWords) { string tempWord = AffixUtility.AddPrefix(suffixWord, rule); if (tempWord != suffixWord) { words.Add(tempWord); } } } words.AddRange(suffixWords); TraceWriter.TraceVerbose("Word Expanded: {0}; Child Words: {1}", word.Text, words.Count); return(words); }
/// <summary> /// /// </summary> /// <param name="word"></param> /// <returns></returns> private bool CheckWordInternal(string word) { if (words.ContainsKey(word)) { return(true); } Dictionary <string, AffixRule> wordForms = new Dictionary <string, AffixRule> (); wordForms.Add(word, null); while (wordForms.Count > 0) { string checkingForm = wordForms.Keys.First(); AffixRule appliedAffixRule = wordForms[checkingForm]; for (int i = 0; i < affixRules.Count; ++i) { var affixRule = affixRules[i]; if (!CanCombineRules(affixRule, appliedAffixRule)) { continue; } foreach (Affix affix in affixRule.Affixes) { string wordBase = affix.ApplyReverse(checkingForm); if (wordBase != null) { // check the word presents in dictionary SpellWord spellWord; if (words.TryGetValue(wordBase, out spellWord)) { if (spellWord.CanApplyRules(appliedAffixRule, affixRule)) { return(true); } } // add new word form if (appliedAffixRule == null && !wordForms.ContainsKey(wordBase)) { wordForms.Add(wordBase, affixRule); } } } } // remove checked word wordForms.Remove(checkingForm); } return(false); }
/// <summary> /// /// </summary> /// <param name="rule"></param> /// <returns></returns> public bool CanApplyRule(AffixRule rule) { int n = AffixRules.Length; char checkingRuleName = rule.Name; for (int i = 0; i < n; ++i) { if (AffixRules[i].Name == checkingRuleName) { return(true); } } return(false); }
/// <summary> /// /// </summary> /// <param name="path"></param> private void LoadWordsFile(string path) { StreamReader f = File.OpenText(path); words = new Dictionary <string, SpellWord> (); string word; f.ReadLine(); // read 1st line with words count while ((word = f.ReadLine()) != null) { int p = word.IndexOf('/'); SpellWord spellWord = new SpellWord(); AffixRulesCollection wordRules = new AffixRulesCollection(); if (p != -1) { int n = word.Length; for (int i = p + 1; i < n; ++i) { char c = word[i]; if (c < affixRulesLookup.Length) { AffixRule rule = affixRulesLookup[c]; if (rule != null) { wordRules.Add(rule); } } } word = word.Substring(0, p); } spellWord.AffixRules = wordRules.ToArray <AffixRule>(); try { words.Add(word, spellWord); } catch (Exception) { } } f.Close(); }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first _baseWords.Clear(); _replaceCharacters.Clear(); _prefixRules.Clear(); _suffixRules.Clear(); _phoneticRules.Clear(); _tryCharacters = ""; //******Below Lines Added********* if (_dictionaryFile.StartsWith(".")) { _dictionaryFile = Thread.CurrentThread.CurrentCulture.Name; } // add the default file extension if (!_dictionaryFile.EndsWith(".dic")) { _dictionaryFile += ".dic"; } //******Above Lines Added********* // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; // Simplify our lives by assuming the default path if (_dictionaryFolder == null) { _dictionaryFolder = Path.Combine(Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location), "Dictionary"); } string dictionaryPath = Path.Combine(_dictionaryFolder, _dictionaryFile); System.Diagnostics.Debug.WriteLine("Loading Dictionary:{0}", dictionaryPath); // open dictionary file if (!File.Exists(dictionaryPath)) { throw new FileNotFoundException("Cannot find dictonary with location " + dictionaryPath); } //BinaryReader reader = new BinaryReader(File.Open("foo.txt", FileMode.Open), // Encoding.GetEncoding(1252)); FileStream fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); StreamReader sr = new StreamReader(fs, true); // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length > 0) { // check for section flag switch (tempLine) { case "[Copyright]": case "[Try]": case "[Replace]": case "[Prefix]": case "[Suffix]": case "[Phonetic]": case "[Words]": // set current section that is being parsed currentSection = tempLine; break; default: // parse line and place in correct object switch (currentSection) { case "[Copyright]": this.Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars this.TryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars this.ReplaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection this.PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection this.SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars if (partMatches[2].Value != "0") { entry.AddCharacters = partMatches[2].Value; } // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } this.BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection swith break; } //tempLine switch } // if templine } // read line // close files sr.Close(); fs.Close(); //System.Diagnostics.Debug.WriteLine("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", // this.BaseWords.Count, this.PrefixRules.Count, this.SuffixRules.Count, this.PhoneticRules.Count); this.LoadUserFile(); _initialized = true; }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first _baseWords.Clear(); _replaceCharacters.Clear(); _prefixRules.Clear(); _suffixRules.Clear(); _phoneticRules.Clear(); _tryCharacters = ""; // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; string dictionaryPath = Path.Combine(_dictionaryFolder, _dictionaryFile); TraceWriter.TraceInfo("Loading Dictionary:{0}", dictionaryPath); // open dictionary file FileStream fs = null; try { fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); using (var sr = new StreamReader(fs, Encoding.UTF8)) { fs = null; // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length <= 0) { continue; } // check for section flag if (tempLine.StartsWith("[") && tempLine.EndsWith("]")) { // set current section that is being parsed currentSection = tempLine; continue; } // parse line and place in correct object switch (currentSection) { case "[Copyright]": Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars TryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars ReplaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection switch } // read line // close files } } finally { if (fs != null) { fs.Dispose(); } } TraceWriter.TraceInfo("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", BaseWords.Count, PrefixRules.Count, SuffixRules.Count, PhoneticRules.Count); LoadUserFile(); _initialized = true; }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public WordDictionary(TextReader inputDictionary) { for (byte i = 0; i < 128; i++) { _encodeTable[i] = i; _decodeTable[i] = (char)i; } MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; while (inputDictionary.Peek() >= 0) { string tempLine = encode(inputDictionary.ReadLine().Trim()); if (tempLine.Length > 0) { // check for section flag switch (tempLine) { case "[Copyright]": case "[Try]": case "[Replace]": case "[Prefix]": case "[Suffix]": case "[Phonetic]": case "[Words]": // set current section that is being parsed currentSection = tempLine; break; default: // parse line and place in correct object switch (currentSection) { case "[Copyright]": //this.Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars _tryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars _replaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection _prefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection _suffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } _baseWords[tempWord.Text] = tempWord; break; } break; } } } }
public void ToString(String affixClass, char affixClassFlag, String strippingCharacters, String affix, String condition, String morphologicalDescription, String expectedLineString) { AffixRule affixRule = new AffixRule(affixClassFlag, strippingCharacters, affix, condition, morphologicalDescription); Assert.AreEqual(expectedLineString, affixRule.ToString(AffixClass.FromString(affixClass))); }