/// <summary> /// Reads the affix file through the provided Stream, building up the prefix and suffix maps. /// </summary> /// <param name="affixStream">Stream to read the content of the affix file from.</param> /// <param name="encoding">Encoding to decode the content of the file.</param> /// <exception cref="IOException">IOException Can be thrown while reading from the Stream.</exception> private void ReadAffixFile(Stream affixStream, Encoding encoding) { if (affixStream == null) { throw new ArgumentNullException("affixStream"); } if (encoding == null) { throw new ArgumentNullException("encoding"); } using (var reader = new StreamReader(affixStream, encoding)) { String line; while ((line = reader.ReadLine()) != null) { if (line.StartsWith(PREFIX_KEY)) { ParseAffix(_prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN); } else if (line.StartsWith(SUFFIX_KEY)) { ParseAffix(_suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN); } else if (line.StartsWith(FLAG_KEY)) { // Assume that the FLAG line comes before any prefix or suffixes // Store the strategy so it can be used when parsing the dic file _flagParsingStrategy = GetFlagParsingStrategy(line); } else if (line.StartsWith(AF_KEY)) { // Parse Alias Flag ParseAliasFlag(line, reader); } } } }
/// <summary> /// Reads the affix file through the provided Stream, building up the prefix and suffix maps. /// </summary> /// <param name="affixStream">Stream to read the content of the affix file from.</param> /// <param name="encoding">Encoding to decode the content of the file.</param> /// <exception cref="IOException">IOException Can be thrown while reading from the Stream.</exception> private void ReadAffixFile(Stream affixStream, Encoding encoding) { if (affixStream == null) throw new ArgumentNullException("affixStream"); if (encoding == null) throw new ArgumentNullException("encoding"); using (var reader = new StreamReader(affixStream, encoding)) { String line; while ((line = reader.ReadLine()) != null) { if (line.StartsWith(PREFIX_KEY)) { ParseAffix(_prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN); } else if (line.StartsWith(SUFFIX_KEY)) { ParseAffix(_suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN); } else if (line.StartsWith(FLAG_KEY)) { // Assume that the FLAG line comes before any prefix or suffixes // Store the strategy so it can be used when parsing the dic file _flagParsingStrategy = GetFlagParsingStrategy(line); } else if (line.StartsWith(AF_KEY)) { // Parse Alias Flag ParseAliasFlag(line, reader); } } } }
/// <summary> /// Reads the affix file through the provided InputStream, building up the prefix and suffix maps /// </summary> /// <param name="affixStream"> InputStream to read the content of the affix file from </param> /// <param name="decoder"> CharsetDecoder to decode the content of the file </param> /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception> private void ReadAffixFile(Stream affixStream, Encoding decoder) { SortedDictionary <string, IList <char?> > prefixes = new SortedDictionary <string, IList <char?> >(); SortedDictionary <string, IList <char?> > suffixes = new SortedDictionary <string, IList <char?> >(); IDictionary <string, int?> seenPatterns = new Dictionary <string, int?>(); // zero condition -> 0 ord seenPatterns[".*"] = 0; patterns.Add(null); // zero strip -> 0 ord IDictionary <string, int?> seenStrips = new Dictionary <string, int?>(); seenStrips[""] = 0; var reader = new StreamReader(affixStream, decoder); string line = null; int lineNumber = 0; while ((line = reader.ReadLine()) != null) { lineNumber++; // ignore any BOM marker on first line if (lineNumber == 1 && line.StartsWith("\uFEFF", StringComparison.Ordinal)) { line = line.Substring(1); } if (line.StartsWith(ALIAS_KEY, StringComparison.Ordinal)) { ParseAlias(line); } else if (line.StartsWith(PREFIX_KEY, StringComparison.Ordinal)) { ParseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips); } else if (line.StartsWith(SUFFIX_KEY, StringComparison.Ordinal)) { ParseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips); } else if (line.StartsWith(FLAG_KEY, StringComparison.Ordinal)) { // Assume that the FLAG line comes before any prefix or suffixes // Store the strategy so it can be used when parsing the dic file flagParsingStrategy = GetFlagParsingStrategy(line); } else if (line.Equals(COMPLEXPREFIXES_KEY)) { complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix } else if (line.StartsWith(CIRCUMFIX_KEY, StringComparison.Ordinal)) { string[] parts = whitespacePattern.Split(line); if (parts.Length != 2) { throw new Exception(string.Format("Illegal CIRCUMFIX declaration, line {0}", lineNumber)); } circumfix = flagParsingStrategy.ParseFlag(parts[1]); } else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal)) { string[] parts = whitespacePattern.Split(line); if (parts.Length != 2) { throw new Exception(string.Format("Illegal IGNORE declaration, line {0}", lineNumber)); } ignore = parts[1].ToCharArray(); Array.Sort(ignore); needsInputCleaning = true; } else if (line.StartsWith(ICONV_KEY, StringComparison.Ordinal) || line.StartsWith(OCONV_KEY, StringComparison.Ordinal)) { string[] parts = whitespacePattern.Split(line); string type = parts[0]; if (parts.Length != 2) { throw new Exception(string.Format("Illegal {0} declaration, line {1}", type, lineNumber)); } int num = int.Parse(parts[1], CultureInfo.InvariantCulture); FST <CharsRef> res = ParseConversions(reader, num); if (type.Equals("ICONV")) { iconv = res; needsInputCleaning |= iconv != null; } else { oconv = res; needsOutputCleaning |= oconv != null; } } } this.prefixes = AffixFST(prefixes); this.suffixes = AffixFST(suffixes); int totalChars = 0; foreach (string strip in seenStrips.Keys) { totalChars += strip.Length; } stripData = new char[totalChars]; stripOffsets = new int[seenStrips.Count + 1]; int currentOffset = 0; int currentIndex = 0; foreach (string strip in seenStrips.Keys) { stripOffsets[currentIndex++] = currentOffset; strip.CopyTo(0, stripData, currentOffset, strip.Length - 0); currentOffset += strip.Length; } Debug.Assert(currentIndex == seenStrips.Count); stripOffsets[currentIndex] = currentOffset; }