/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first _baseWords.Clear(); _replaceCharacters.Clear(); _prefixRules.Clear(); _suffixRules.Clear(); _phoneticRules.Clear(); _tryCharacters = ""; // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; string dictionaryPath = Path.Combine(_dictionaryFolder, _dictionaryFile); TraceWriter.TraceInfo("Loading Dictionary:{0}", dictionaryPath); // open dictionary file FileStream fs = null; try { fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); using (var sr = new StreamReader(fs, Encoding.UTF8)) { fs = null; // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length <= 0) { continue; } // check for section flag if (tempLine.StartsWith("[") && tempLine.EndsWith("]")) { // set current section that is being parsed currentSection = tempLine; continue; } // parse line and place in correct object switch (currentSection) { case "[Copyright]": Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars TryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars ReplaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection switch } // read line // close files } } finally { if (fs != null) { fs.Dispose(); } } TraceWriter.TraceInfo("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", BaseWords.Count, PrefixRules.Count, SuffixRules.Count, PhoneticRules.Count); LoadUserFile(); _initialized = true; }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public void Initialize() { // clean up data first _baseWords.Clear(); _replaceCharacters.Clear(); _prefixRules.Clear(); _suffixRules.Clear(); _phoneticRules.Clear(); _tryCharacters = ""; //******Below Lines Added********* if (_dictionaryFile.StartsWith(".")) { _dictionaryFile = Thread.CurrentThread.CurrentCulture.Name; } // add the default file extension if (!_dictionaryFile.EndsWith(".dic")) { _dictionaryFile += ".dic"; } //******Above Lines Added********* // the following is used to split a line by white space Regex _spaceRegx = new Regex(@"[^\s]+", RegexOptions.Compiled); MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; // Simplify our lives by assuming the default path if (_dictionaryFolder == null) { _dictionaryFolder = Path.Combine(Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location), "Dictionary"); } string dictionaryPath = Path.Combine(_dictionaryFolder, _dictionaryFile); System.Diagnostics.Debug.WriteLine("Loading Dictionary:{0}", dictionaryPath); // open dictionary file if (!File.Exists(dictionaryPath)) { throw new FileNotFoundException("Cannot find dictonary with location " + dictionaryPath); } //BinaryReader reader = new BinaryReader(File.Open("foo.txt", FileMode.Open), // Encoding.GetEncoding(1252)); FileStream fs = new FileStream(dictionaryPath, FileMode.Open, FileAccess.Read, FileShare.Read); StreamReader sr = new StreamReader(fs, true); // read line by line while (sr.Peek() >= 0) { string tempLine = sr.ReadLine().Trim(); if (tempLine.Length > 0) { // check for section flag switch (tempLine) { case "[Copyright]": case "[Try]": case "[Replace]": case "[Prefix]": case "[Suffix]": case "[Phonetic]": case "[Words]": // set current section that is being parsed currentSection = tempLine; break; default: // parse line and place in correct object switch (currentSection) { case "[Copyright]": this.Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars this.TryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars this.ReplaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection this.PrefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection this.SuffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars if (partMatches[2].Value != "0") { entry.AddCharacters = partMatches[2].Value; } // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } this.BaseWords.Add(tempWord.Text, tempWord); break; } // currentSection swith break; } //tempLine switch } // if templine } // read line // close files sr.Close(); fs.Close(); //System.Diagnostics.Debug.WriteLine("Dictionary Loaded BaseWords:{0}; PrefixRules:{1}; SuffixRules:{2}; PhoneticRules:{3}", // this.BaseWords.Count, this.PrefixRules.Count, this.SuffixRules.Count, this.PhoneticRules.Count); this.LoadUserFile(); _initialized = true; }
/// <summary> /// Initializes the dictionary by loading and parsing the /// dictionary file and the user file. /// </summary> public WordDictionary(TextReader inputDictionary) { for (byte i = 0; i < 128; i++) { _encodeTable[i] = i; _decodeTable[i] = (char)i; } MatchCollection partMatches; string currentSection = ""; AffixRule currentRule = null; while (inputDictionary.Peek() >= 0) { string tempLine = encode(inputDictionary.ReadLine().Trim()); if (tempLine.Length > 0) { // check for section flag switch (tempLine) { case "[Copyright]": case "[Try]": case "[Replace]": case "[Prefix]": case "[Suffix]": case "[Phonetic]": case "[Words]": // set current section that is being parsed currentSection = tempLine; break; default: // parse line and place in correct object switch (currentSection) { case "[Copyright]": //this.Copyright += tempLine + "\r\n"; break; case "[Try]": // ISpell try chars _tryCharacters += tempLine; break; case "[Replace]": // ISpell replace chars _replaceCharacters.Add(tempLine); break; case "[Prefix]": // MySpell prefix rules case "[Suffix]": // MySpell suffix rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); // if 3 parts, then new rule if (partMatches.Count == 3) { currentRule = new AffixRule(); // part 1 = affix key currentRule.Name = partMatches[0].Value; // part 2 = combine flag if (partMatches[1].Value == "Y") { currentRule.AllowCombine = true; } // part 3 = entry count, not used if (currentSection == "[Prefix]") { // add to prefix collection _prefixRules.Add(currentRule.Name, currentRule); } else { // add to suffix collection _suffixRules.Add(currentRule.Name, currentRule); } } //if 4 parts, then entry for current rule else if (partMatches.Count == 4) { // part 1 = affix key if (currentRule.Name == partMatches[0].Value) { AffixEntry entry = new AffixEntry(); // part 2 = strip char if (partMatches[1].Value != "0") { entry.StripCharacters = partMatches[1].Value; } // part 3 = add chars entry.AddCharacters = partMatches[2].Value; // part 4 = conditions AffixUtility.EncodeConditions(partMatches[3].Value, entry); currentRule.AffixEntries.Add(entry); } } break; case "[Phonetic]": // ASpell phonetic rules // split line by white space partMatches = _spaceRegx.Matches(tempLine); if (partMatches.Count >= 2) { PhoneticRule rule = new PhoneticRule(); PhoneticUtility.EncodeRule(partMatches[0].Value, ref rule); rule.ReplaceString = partMatches[1].Value; _phoneticRules.Add(rule); } break; case "[Words]": // dictionary word list // splits word into its parts string[] parts = tempLine.Split('/'); Word tempWord = new Word(); // part 1 = base word tempWord.Text = parts[0]; // part 2 = affix keys if (parts.Length >= 2) { tempWord.AffixKeys = parts[1]; } // part 3 = phonetic code if (parts.Length >= 3) { tempWord.PhoneticCode = parts[2]; } _baseWords[tempWord.Text] = tempWord; break; } break; } } } }