Esempio n. 1
0
        /// <summary>
        /// Builds the final automaton from a list of entries.
        /// </summary>
        private FST <object> BuildAutomaton(BytesRefSorter sorter)
        {
            // Build the automaton.
            Outputs <object> outputs = NoOutputs.Singleton;
            object           empty   = outputs.NoOutput;
            Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15);

            BytesRef         scratch = new BytesRef();
            BytesRef         entry;
            IntsRef          scratchIntsRef = new IntsRef();
            int              count          = 0;
            BytesRefIterator iter           = sorter.GetEnumerator();

            while ((entry = iter.Next()) != null)
            {
                count++;
                if (scratch.CompareTo(entry) != 0)
                {
                    builder.Add(Util.Fst.Util.ToIntsRef(entry, scratchIntsRef), empty);
                    scratch.CopyBytes(entry);
                }
            }

            return(count == 0 ? null : builder.Finish());
        }
Esempio n. 2
0
        /// <summary>
        /// Creates an FSTCompletion with the specified options. </summary>
        /// <param name="buckets">
        ///          The number of buckets for weight discretization. Buckets are used
        ///          in <seealso cref="#add(BytesRef, int)"/> and must be smaller than the number
        ///          given here.
        /// </param>
        /// <param name="sorter">
        ///          <seealso cref="BytesRefSorter"/> used for re-sorting input for the automaton.
        ///          For large inputs, use on-disk sorting implementations. The sorter
        ///          is closed automatically in <seealso cref="#build()"/> if it implements
        ///          <seealso cref="Closeable"/>.
        /// </param>
        /// <param name="shareMaxTailLength">
        ///          Max shared suffix sharing length.
        ///
        ///          See the description of this parameter in <seealso cref="Builder"/>'s constructor.
        ///          In general, for very large inputs you'll want to construct a non-minimal
        ///          automaton which will be larger, but the construction will take far less ram.
        ///          For minimal automata, set it to <seealso cref="Integer#MAX_VALUE"/>. </param>
        public FSTCompletionBuilder(int buckets, BytesRefSorter sorter, int shareMaxTailLength)
        {
            if (buckets < 1 || buckets > 255)
            {
                throw new System.ArgumentException("Buckets must be >= 1 and <= 255: " + buckets);
            }

            if (sorter == null)
            {
                throw new System.ArgumentException("BytesRefSorter must not be null.");
            }

            this.sorter             = sorter;
            this.buckets            = buckets;
            this.shareMaxTailLength = shareMaxTailLength;
        }
        /// <summary>
        /// Builds the final automaton from a list of entries.
        /// </summary>
        private FST<object> BuildAutomaton(BytesRefSorter sorter)
        {
            // Build the automaton.
            Outputs<object> outputs = NoOutputs.Singleton;
            object empty = outputs.NoOutput;
            Builder<object> builder = new Builder<object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15);

            BytesRef scratch = new BytesRef();
            BytesRef entry;
            IntsRef scratchIntsRef = new IntsRef();
            int count = 0;
            BytesRefIterator iter = sorter.GetEnumerator();
            while ((entry = iter.Next()) != null)
            {
                count++;
                if (scratch.CompareTo(entry) != 0)
                {
                    builder.Add(Util.Fst.Util.ToIntsRef(entry, scratchIntsRef), empty);
                    scratch.CopyBytes(entry);
                }
            }

            return count == 0 ? null : builder.Finish();
        }
        /// <summary>
        /// Creates an FSTCompletion with the specified options. </summary>
        /// <param name="buckets">
        ///          The number of buckets for weight discretization. Buckets are used
        ///          in <seealso cref="#add(BytesRef, int)"/> and must be smaller than the number
        ///          given here.
        /// </param>
        /// <param name="sorter">
        ///          <seealso cref="BytesRefSorter"/> used for re-sorting input for the automaton.
        ///          For large inputs, use on-disk sorting implementations. The sorter
        ///          is closed automatically in <seealso cref="#build()"/> if it implements
        ///          <seealso cref="Closeable"/>.
        /// </param>
        /// <param name="shareMaxTailLength">
        ///          Max shared suffix sharing length.
        ///          
        ///          See the description of this parameter in <seealso cref="Builder"/>'s constructor.
        ///          In general, for very large inputs you'll want to construct a non-minimal
        ///          automaton which will be larger, but the construction will take far less ram.
        ///          For minimal automata, set it to <seealso cref="Integer#MAX_VALUE"/>. </param>
        public FSTCompletionBuilder(int buckets, BytesRefSorter sorter, int shareMaxTailLength)
        {
            if (buckets < 1 || buckets > 255)
            {
                throw new System.ArgumentException("Buckets must be >= 1 and <= 255: " + buckets);
            }

            if (sorter == null)
            {
                throw new System.ArgumentException("BytesRefSorter must not be null.");
            }

            this.sorter = sorter;
            this.buckets = buckets;
            this.shareMaxTailLength = shareMaxTailLength;
        }