Esempio n. 1
0
        /// <summary>
        /// Creates a new suggester.
        /// </summary>
        /// <param name="indexAnalyzer"> Analyzer that will be used for
        ///   analyzing suggestions while building the index. </param>
        /// <param name="queryAnalyzer"> Analyzer that will be used for
        ///   analyzing query text during lookup </param>
        /// <param name="options"> see <see cref="SuggesterOptions.EXACT_FIRST"/>, <see cref="SuggesterOptions.PRESERVE_SEP"/> </param>
        /// <param name="maxSurfaceFormsPerAnalyzedForm"> Maximum number of
        ///   surface forms to keep for a single analyzed form.
        ///   When there are too many surface forms we discard the
        ///   lowest weighted ones. </param>
        /// <param name="maxGraphExpansions"> Maximum number of graph paths
        ///   to expand from the analyzed form.  Set this to -1 for
        ///   no limit. </param>
        /// <param name="preservePositionIncrements"> Whether position holes
        ///   should appear in the automata </param>
        public AnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, SuggesterOptions options,
                                  int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, bool preservePositionIncrements)
        {
            this.indexAnalyzer = indexAnalyzer;
            this.queryAnalyzer = queryAnalyzer;
            if ((options & ~(SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP)) != 0)
            {
                throw new ArgumentException("options should only contain SuggesterOptions.EXACT_FIRST and SuggesterOptions.PRESERVE_SEP; got " +
                                            options);
            }
            this.exactFirst  = (options & SuggesterOptions.EXACT_FIRST) != 0;
            this.preserveSep = (options & SuggesterOptions.PRESERVE_SEP) != 0;

            // NOTE: this is just an implementation limitation; if
            // somehow this is a problem we could fix it by using
            // more than one byte to disambiguate ... but 256 seems
            // like it should be way more then enough.
            if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256)
            {
                throw new ArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " +
                                            maxSurfaceFormsPerAnalyzedForm + ")");
            }
            this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;

            if (maxGraphExpansions < 1 && maxGraphExpansions != -1)
            {
                throw new ArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " +
                                            maxGraphExpansions + ")");
            }
            this.maxGraphExpansions         = maxGraphExpansions;
            this.preservePositionIncrements = preservePositionIncrements;
        }
Esempio n. 2
0
        /// <summary>
        /// Creates a <see cref="FuzzySuggester"/> instance.
        /// </summary>
        /// <param name="indexAnalyzer"> The <see cref="Analyzer"/> that will be used for
        ///        analyzing suggestions while building the index. </param>
        /// <param name="queryAnalyzer"> The <see cref="Analyzer"/> that will be used for
        ///        analyzing query text during lookup </param>
        /// <param name="options"> see <see cref="SuggesterOptions.EXACT_FIRST"/>, <see cref="SuggesterOptions.PRESERVE_SEP"/> </param>
        /// <param name="maxSurfaceFormsPerAnalyzedForm"> Maximum number of
        ///        surface forms to keep for a single analyzed form.
        ///        When there are too many surface forms we discard the
        ///        lowest weighted ones. </param>
        /// <param name="maxGraphExpansions"> Maximum number of graph paths
        ///        to expand from the analyzed form.  Set this to -1 for
        ///        no limit. </param>
        /// <param name="preservePositionIncrements"> Whether position holes should appear in the automaton </param>
        /// <param name="maxEdits"> must be &gt;= 0 and &lt;= <see cref="LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE"/>. </param>
        /// <param name="transpositions"> <c>true</c> if transpositions should be treated as a primitive
        ///        edit operation. If this is false, comparisons will implement the classic
        ///        Levenshtein algorithm. </param>
        /// <param name="nonFuzzyPrefix"> length of common (non-fuzzy) prefix (see default <see cref="DEFAULT_NON_FUZZY_PREFIX"/> </param>
        /// <param name="minFuzzyLength"> minimum length of lookup key before any edits are allowed (see default <see cref="DEFAULT_MIN_FUZZY_LENGTH"/>) </param>
        /// <param name="unicodeAware"> operate Unicode code points instead of bytes. </param>
        public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, SuggesterOptions options,
                              int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, bool preservePositionIncrements,
                              int maxEdits, bool transpositions, int nonFuzzyPrefix, int minFuzzyLength, bool unicodeAware)
            : base(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements)
        {
            if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
            {
                throw new ArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
            }
            if (nonFuzzyPrefix < 0)
            {
                throw new ArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
            }
            if (minFuzzyLength < 0)
            {
                throw new ArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
            }

            this.maxEdits       = maxEdits;
            this.transpositions = transpositions;
            this.nonFuzzyPrefix = nonFuzzyPrefix;
            this.minFuzzyLength = minFuzzyLength;
            this.unicodeAware   = unicodeAware;
        }
        /// <summary>
        /// Creates a <see cref="FuzzySuggester"/> instance.
        /// </summary>
        /// <param name="indexAnalyzer"> The <see cref="Analyzer"/> that will be used for
        ///        analyzing suggestions while building the index. </param>
        /// <param name="queryAnalyzer"> The <see cref="Analyzer"/> that will be used for
        ///        analyzing query text during lookup </param>
        /// <param name="options"> see <see cref="SuggesterOptions.EXACT_FIRST"/>, <see cref="SuggesterOptions.PRESERVE_SEP"/> </param>
        /// <param name="maxSurfaceFormsPerAnalyzedForm"> Maximum number of
        ///        surface forms to keep for a single analyzed form.
        ///        When there are too many surface forms we discard the
        ///        lowest weighted ones. </param>
        /// <param name="maxGraphExpansions"> Maximum number of graph paths
        ///        to expand from the analyzed form.  Set this to -1 for
        ///        no limit. </param>
        /// <param name="preservePositionIncrements"> Whether position holes should appear in the automaton </param>
        /// <param name="maxEdits"> must be &gt;= 0 and &lt;= <see cref="LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE"/>. </param>
        /// <param name="transpositions"> <c>true</c> if transpositions should be treated as a primitive 
        ///        edit operation. If this is false, comparisons will implement the classic
        ///        Levenshtein algorithm. </param>
        /// <param name="nonFuzzyPrefix"> length of common (non-fuzzy) prefix (see default <see cref="DEFAULT_NON_FUZZY_PREFIX"/> </param>
        /// <param name="minFuzzyLength"> minimum length of lookup key before any edits are allowed (see default <see cref="DEFAULT_MIN_FUZZY_LENGTH"/>) </param>
        /// <param name="unicodeAware"> operate Unicode code points instead of bytes. </param>
        public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, SuggesterOptions options, 
            int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, bool preservePositionIncrements, 
            int maxEdits, bool transpositions, int nonFuzzyPrefix, int minFuzzyLength, bool unicodeAware)
            : base(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements)
        {
            if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
            {
                throw new System.ArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
            }
            if (nonFuzzyPrefix < 0)
            {
                throw new System.ArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
            }
            if (minFuzzyLength < 0)
            {
                throw new System.ArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
            }

            this.maxEdits = maxEdits;
            this.transpositions = transpositions;
            this.nonFuzzyPrefix = nonFuzzyPrefix;
            this.minFuzzyLength = minFuzzyLength;
            this.unicodeAware = unicodeAware;
        }