/// <summary> /// Reads the dictionary file through the provided Stream, building up the words map. /// </summary> /// <param name="dictionary">Stream to read the dictionary file through.</param> /// <param name="encoding">Encoding used to decode the contents of the file.</param> /// <exception cref="IOException">Can be thrown while reading from the file.</exception> private void ReadDictionaryFile(Stream dictionary, Encoding encoding) { if (dictionary == null) { throw new ArgumentNullException("dictionary"); } if (encoding == null) { throw new ArgumentNullException("encoding"); } var reader = new StreamReader(dictionary, encoding); // nocommit, don't create millions of strings. var line = reader.ReadLine(); // first line is number of entries var numEntries = Int32.Parse(line); var hasAliases = _aliases.Count > 0; // nocommit, the flags themselves can be double-chars (long) or also numeric // either way the trick is to encode them as char... but they must be parsed differently while ((line = reader.ReadLine()) != null) { String entry; HunspellWord wordForm; var flagSep = line.LastIndexOf('/'); if (flagSep == -1) { wordForm = NoFlags; entry = line; } else { // note, there can be comments (morph description) after a flag. // we should really look for any whitespace var end = line.IndexOf('\t', flagSep); var cflag = end == -1 ? line.Substring(flagSep + 1) : line.Substring(flagSep + 1, end - flagSep - 1); wordForm = new HunspellWord(hasAliases ? _aliases[cflag] : _flagParsingStrategy.ParseFlags(cflag)); entry = line.Substring(0, flagSep); } List <HunspellWord> entries; if (!_words.TryGetValue(entry, out entries)) { _words.Add(entry, entries = new List <HunspellWord>()); } entries.Add(wordForm); } }
/// <summary> /// Reads the dictionary file through the provided Stream, building up the words map. /// </summary> /// <param name="dictionary">Stream to read the dictionary file through.</param> /// <param name="encoding">Encoding used to decode the contents of the file.</param> /// <exception cref="IOException">Can be thrown while reading from the file.</exception> private void ReadDictionaryFile(Stream dictionary, Encoding encoding) { if (dictionary == null) throw new ArgumentNullException("dictionary"); if (encoding == null) throw new ArgumentNullException("encoding"); var reader = new StreamReader(dictionary, encoding); // nocommit, don't create millions of strings. var line = reader.ReadLine(); // first line is number of entries var numEntries = Int32.Parse(line); // nocommit, the flags themselves can be double-chars (long) or also numeric // either way the trick is to encode them as char... but they must be parsed differently while ((line = reader.ReadLine()) != null) { String entry; HunspellWord wordForm; var flagSep = line.LastIndexOf('/'); if (flagSep == -1) { wordForm = NoFlags; entry = line; } else { // note, there can be comments (morph description) after a flag. // we should really look for any whitespace var end = line.IndexOf('\t', flagSep); var cflag = end == -1 ? line.Substring(flagSep + 1) : line.Substring(flagSep + 1, end - flagSep - 1); wordForm = new HunspellWord(_alias.Count > 0 ? _alias[int.Parse(cflag) - 1] : _flagParsingStrategy.ParseFlags(cflag)); entry = line.Substring(0, flagSep); } List<HunspellWord> entries; if (!_words.TryGetValue(entry, out entries)) _words.Add(entry, entries = new List<HunspellWord>()); entries.Add(wordForm); } }