/// <summary>
        /// Builds the final automaton from a list of entries.
        /// </summary>
        private FST <object> BuildAutomaton(IBytesRefSorter sorter)
        {
            // Build the automaton.
            Outputs <object> outputs = NoOutputs.Singleton;
            object           empty   = outputs.NoOutput;
            Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInt32s.DEFAULT, true, 15);

            BytesRef          scratch = new BytesRef();
            BytesRef          entry;
            Int32sRef         scratchIntsRef = new Int32sRef();
            int               count          = 0;
            IBytesRefIterator iter           = sorter.GetIterator();

            while ((entry = iter.Next()) != null)
            {
                count++;
                if (scratch.CompareTo(entry) != 0)
                {
                    builder.Add(Util.Fst.Util.ToInt32sRef(entry, scratchIntsRef), empty);
                    scratch.CopyBytes(entry);
                }
            }

            return(count == 0 ? null : builder.Finish());
        }
Exemple #2
0
        private void Check(IBytesRefSorter sorter)
        {
            for (int i = 0; i < 100; i++)
            {
                byte[] current = new byte[Random.Next(256)];
                Random.NextBytes(current);
                sorter.Add(new BytesRef(current));
            }

            // Create two iterators and check that they're aligned with each other.
            IBytesRefEnumerator i1 = sorter.GetEnumerator();
            IBytesRefEnumerator i2 = sorter.GetEnumerator();

            // Verify sorter contract.
            try
            {
                sorter.Add(new BytesRef(new byte[1]));
                fail("expected contract violation.");
            }
            catch (Exception e) when(e.IsIllegalStateException())
            {
                // Expected.
            }
            while (i1.MoveNext() && i2.MoveNext())
            {
                assertEquals(i1.Current, i2.Current);
            }
            assertFalse(i1.MoveNext());
            assertFalse(i2.MoveNext());
        }
Exemple #3
0
        private void Check(IBytesRefSorter sorter)
        {
            for (int i = 0; i < 100; i++)
            {
                byte[] current = new byte[Random.nextInt(256)];
                Random.NextBytes(current);
                sorter.Add(new BytesRef(current));
            }

            // Create two iterators and check that they're aligned with each other.
            IBytesRefIterator i1 = sorter.GetIterator();
            IBytesRefIterator i2 = sorter.GetIterator();

            // Verify sorter contract.
            try
            {
                sorter.Add(new BytesRef(new byte[1]));
                fail("expected contract violation.");
            }
            catch (InvalidOperationException /*e*/)
            {
                // Expected.
            }
            BytesRef spare1;
            BytesRef spare2;

            while ((spare1 = i1.Next()) != null && (spare2 = i2.Next()) != null)
            {
                assertEquals(spare1, spare2);
            }
            assertNull(i1.Next());
            assertNull(i2.Next());
        }
Exemple #4
0
        /// <summary>
        /// Creates an FSTCompletion with the specified options. </summary>
        /// <param name="buckets">
        ///          The number of buckets for weight discretization. Buckets are used
        ///          in <see cref="Add(BytesRef, int)"/> and must be smaller than the number
        ///          given here.
        /// </param>
        /// <param name="sorter">
        ///          <see cref="IBytesRefSorter"/> used for re-sorting input for the automaton.
        ///          For large inputs, use on-disk sorting implementations. The sorter
        ///          is closed automatically in <see cref="Build()"/> if it implements
        ///          <see cref="IDisposable"/>.
        /// </param>
        /// <param name="shareMaxTailLength">
        ///          Max shared suffix sharing length.
        ///
        ///          See the description of this parameter in <see cref="Builder"/>'s constructor.
        ///          In general, for very large inputs you'll want to construct a non-minimal
        ///          automaton which will be larger, but the construction will take far less ram.
        ///          For minimal automata, set it to <see cref="int.MaxValue"/>. </param>
        public FSTCompletionBuilder(int buckets, IBytesRefSorter sorter, int shareMaxTailLength)
        {
            if (buckets < 1 || buckets > 255)
            {
                throw new ArgumentOutOfRangeException(nameof(buckets), buckets, "Buckets must be >= 1 and <= 255"); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            }

            this.sorter             = sorter ?? throw new ArgumentNullException(nameof(sorter), "BytesRefSorter must not be null."); // LUCENENET specific - changed from IllegalArgumentException to ArgumentNullException (.NET convention)
            this.buckets            = buckets;
            this.shareMaxTailLength = shareMaxTailLength;
        }
Exemple #5
0
        /// <summary>
        /// Creates an FSTCompletion with the specified options. </summary>
        /// <param name="buckets">
        ///          The number of buckets for weight discretization. Buckets are used
        ///          in <see cref="Add(BytesRef, int)"/> and must be smaller than the number
        ///          given here.
        /// </param>
        /// <param name="sorter">
        ///          <see cref="IBytesRefSorter"/> used for re-sorting input for the automaton.
        ///          For large inputs, use on-disk sorting implementations. The sorter
        ///          is closed automatically in <see cref="Build()"/> if it implements
        ///          <see cref="IDisposable"/>.
        /// </param>
        /// <param name="shareMaxTailLength">
        ///          Max shared suffix sharing length.
        ///
        ///          See the description of this parameter in <see cref="Builder"/>'s constructor.
        ///          In general, for very large inputs you'll want to construct a non-minimal
        ///          automaton which will be larger, but the construction will take far less ram.
        ///          For minimal automata, set it to <see cref="int.MaxValue"/>. </param>
        public FSTCompletionBuilder(int buckets, IBytesRefSorter sorter, int shareMaxTailLength)
        {
            if (buckets < 1 || buckets > 255)
            {
                throw new ArgumentOutOfRangeException(nameof(buckets), buckets, "Buckets must be >= 1 and <= 255");
            }

            this.sorter             = sorter ?? throw new ArgumentNullException("BytesRefSorter must not be null.");
            this.buckets            = buckets;
            this.shareMaxTailLength = shareMaxTailLength;
        }
        /// <summary>
        /// Creates an FSTCompletion with the specified options. </summary>
        /// <param name="buckets">
        ///          The number of buckets for weight discretization. Buckets are used
        ///          in <see cref="Add(BytesRef, int)"/> and must be smaller than the number
        ///          given here.
        /// </param>
        /// <param name="sorter">
        ///          <see cref="IBytesRefSorter"/> used for re-sorting input for the automaton.
        ///          For large inputs, use on-disk sorting implementations. The sorter
        ///          is closed automatically in <see cref="Build()"/> if it implements
        ///          <see cref="IDisposable"/>.
        /// </param>
        /// <param name="shareMaxTailLength">
        ///          Max shared suffix sharing length.
        ///
        ///          See the description of this parameter in <see cref="Builder"/>'s constructor.
        ///          In general, for very large inputs you'll want to construct a non-minimal
        ///          automaton which will be larger, but the construction will take far less ram.
        ///          For minimal automata, set it to <see cref="int.MaxValue"/>. </param>
        public FSTCompletionBuilder(int buckets, IBytesRefSorter sorter, int shareMaxTailLength)
        {
            if (buckets < 1 || buckets > 255)
            {
                throw new System.ArgumentException("Buckets must be >= 1 and <= 255: " + buckets);
            }

            if (sorter == null)
            {
                throw new System.ArgumentException("BytesRefSorter must not be null.");
            }

            this.sorter             = sorter;
            this.buckets            = buckets;
            this.shareMaxTailLength = shareMaxTailLength;
        }