/// <summary>
        ///   Reads the affix file through the provided Stream, building up the prefix and suffix maps.
        /// </summary>
        /// <param name="affixStream">Stream to read the content of the affix file from.</param>
        /// <param name="encoding">Encoding to decode the content of the file.</param>
        /// <exception cref="IOException">IOException Can be thrown while reading from the Stream.</exception>
        private void ReadAffixFile(Stream affixStream, Encoding encoding)
        {
            if (affixStream == null)
            {
                throw new ArgumentNullException("affixStream");
            }
            if (encoding == null)
            {
                throw new ArgumentNullException("encoding");
            }

            using (var reader = new StreamReader(affixStream, encoding)) {
                String line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (line.StartsWith(PREFIX_KEY))
                    {
                        ParseAffix(_prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN);
                    }
                    else if (line.StartsWith(SUFFIX_KEY))
                    {
                        ParseAffix(_suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN);
                    }
                    else if (line.StartsWith(FLAG_KEY))
                    {
                        // Assume that the FLAG line comes before any prefix or suffixes
                        // Store the strategy so it can be used when parsing the dic file
                        _flagParsingStrategy = GetFlagParsingStrategy(line);
                    }
                    else if (line.StartsWith(AF_KEY))
                    {
                        // Parse Alias Flag
                        ParseAliasFlag(line, reader);
                    }
                }
            }
        }
        /// <summary>
        ///   Reads the affix file through the provided Stream, building up the prefix and suffix maps.
        /// </summary>
        /// <param name="affixStream">Stream to read the content of the affix file from.</param>
        /// <param name="encoding">Encoding to decode the content of the file.</param>
        /// <exception cref="IOException">IOException Can be thrown while reading from the Stream.</exception>
        private void ReadAffixFile(Stream affixStream, Encoding encoding) {
            if (affixStream == null) throw new ArgumentNullException("affixStream");
            if (encoding == null) throw new ArgumentNullException("encoding");

            using (var reader = new StreamReader(affixStream, encoding)) {
                String line;
                while ((line = reader.ReadLine()) != null) {
                    if (line.StartsWith(PREFIX_KEY)) {
                        ParseAffix(_prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN);
                    } else if (line.StartsWith(SUFFIX_KEY)) {
                        ParseAffix(_suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN);
                    } else if (line.StartsWith(FLAG_KEY)) {
                        // Assume that the FLAG line comes before any prefix or suffixes
                        // Store the strategy so it can be used when parsing the dic file
                        _flagParsingStrategy = GetFlagParsingStrategy(line);
                    } else if (line.StartsWith(AF_KEY)) {
                        // Parse Alias Flag
                        ParseAliasFlag(line, reader);
                    }
                }
            }
        }
예제 #3
0
파일: Dictionary.cs 프로젝트: wwb/lucenenet
        /// <summary>
        /// Reads the affix file through the provided InputStream, building up the prefix and suffix maps
        /// </summary>
        /// <param name="affixStream"> InputStream to read the content of the affix file from </param>
        /// <param name="decoder"> CharsetDecoder to decode the content of the file </param>
        /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
        private void ReadAffixFile(Stream affixStream, Encoding decoder)
        {
            SortedDictionary <string, IList <char?> > prefixes = new SortedDictionary <string, IList <char?> >();
            SortedDictionary <string, IList <char?> > suffixes = new SortedDictionary <string, IList <char?> >();
            IDictionary <string, int?> seenPatterns            = new Dictionary <string, int?>();

            // zero condition -> 0 ord
            seenPatterns[".*"] = 0;
            patterns.Add(null);

            // zero strip -> 0 ord
            IDictionary <string, int?> seenStrips = new Dictionary <string, int?>();

            seenStrips[""] = 0;

            var    reader     = new StreamReader(affixStream, decoder);
            string line       = null;
            int    lineNumber = 0;

            while ((line = reader.ReadLine()) != null)
            {
                lineNumber++;
                // ignore any BOM marker on first line
                if (lineNumber == 1 && line.StartsWith("\uFEFF", StringComparison.Ordinal))
                {
                    line = line.Substring(1);
                }
                if (line.StartsWith(ALIAS_KEY, StringComparison.Ordinal))
                {
                    ParseAlias(line);
                }
                else if (line.StartsWith(PREFIX_KEY, StringComparison.Ordinal))
                {
                    ParseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
                }
                else if (line.StartsWith(SUFFIX_KEY, StringComparison.Ordinal))
                {
                    ParseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
                }
                else if (line.StartsWith(FLAG_KEY, StringComparison.Ordinal))
                {
                    // Assume that the FLAG line comes before any prefix or suffixes
                    // Store the strategy so it can be used when parsing the dic file
                    flagParsingStrategy = GetFlagParsingStrategy(line);
                }
                else if (line.Equals(COMPLEXPREFIXES_KEY))
                {
                    complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
                }
                else if (line.StartsWith(CIRCUMFIX_KEY, StringComparison.Ordinal))
                {
                    string[] parts = whitespacePattern.Split(line);
                    if (parts.Length != 2)
                    {
                        throw new Exception(string.Format("Illegal CIRCUMFIX declaration, line {0}", lineNumber));
                    }
                    circumfix = flagParsingStrategy.ParseFlag(parts[1]);
                }
                else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal))
                {
                    string[] parts = whitespacePattern.Split(line);
                    if (parts.Length != 2)
                    {
                        throw new Exception(string.Format("Illegal IGNORE declaration, line {0}", lineNumber));
                    }
                    ignore = parts[1].ToCharArray();
                    Array.Sort(ignore);
                    needsInputCleaning = true;
                }
                else if (line.StartsWith(ICONV_KEY, StringComparison.Ordinal) || line.StartsWith(OCONV_KEY, StringComparison.Ordinal))
                {
                    string[] parts = whitespacePattern.Split(line);
                    string   type  = parts[0];
                    if (parts.Length != 2)
                    {
                        throw new Exception(string.Format("Illegal {0} declaration, line {1}", type, lineNumber));
                    }
                    int            num = int.Parse(parts[1], CultureInfo.InvariantCulture);
                    FST <CharsRef> res = ParseConversions(reader, num);
                    if (type.Equals("ICONV"))
                    {
                        iconv = res;
                        needsInputCleaning |= iconv != null;
                    }
                    else
                    {
                        oconv = res;
                        needsOutputCleaning |= oconv != null;
                    }
                }
            }

            this.prefixes = AffixFST(prefixes);
            this.suffixes = AffixFST(suffixes);

            int totalChars = 0;

            foreach (string strip in seenStrips.Keys)
            {
                totalChars += strip.Length;
            }
            stripData    = new char[totalChars];
            stripOffsets = new int[seenStrips.Count + 1];
            int currentOffset = 0;
            int currentIndex  = 0;

            foreach (string strip in seenStrips.Keys)
            {
                stripOffsets[currentIndex++] = currentOffset;
                strip.CopyTo(0, stripData, currentOffset, strip.Length - 0);
                currentOffset += strip.Length;
            }
            Debug.Assert(currentIndex == seenStrips.Count);
            stripOffsets[currentIndex] = currentOffset;
        }