/// <summary>
        /// Builds the final automaton from a list of entries.
        /// </summary>
        private FST <object> BuildAutomaton(IBytesRefSorter sorter)
        {
            // Build the automaton.
            Outputs <object> outputs = NoOutputs.Singleton;
            object           empty   = outputs.NoOutput;
            Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInt32s.DEFAULT, true, 15);

            BytesRef            scratch = new BytesRef();
            BytesRef            entry;
            Int32sRef           scratchIntsRef = new Int32sRef();
            int                 count          = 0;
            IBytesRefEnumerator iter           = sorter.GetEnumerator();

            while (iter.MoveNext())
            {
                entry = iter.Current;
                count++;
                if (scratch.CompareTo(entry) != 0)
                {
                    builder.Add(Util.Fst.Util.ToInt32sRef(entry, scratchIntsRef), empty);
                    scratch.CopyBytes(entry);
                }
            }

            return(count == 0 ? null : builder.Finish());
        }
Example #2
0
        /// <summary>
        /// Sort a single partition in-memory. </summary>
        private FileInfo SortPartition(/*int len*/) // LUCENENET NOTE: made private, since protected is not valid in a sealed class. Also eliminated unused parameter.
        {
            var      data     = this.buffer;
            FileInfo tempFile = FileSupport.CreateTempFile("sort", "partition", DefaultTempDir());

            long start = Environment.TickCount;

            sortInfo.SortTime += (Environment.TickCount - start);

            using (var @out = new ByteSequencesWriter(tempFile))
            {
                IBytesRefEnumerator iter = buffer.GetEnumerator(comparer);
                while (iter.MoveNext())
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(iter.Current.Length <= ushort.MaxValue);
                    }
                    @out.Write(iter.Current);
                }
            }

            // Clean up the buffer for the next partition.
            data.Clear();
            return(tempFile);
        }
Example #3
0
        private void Check(IBytesRefSorter sorter)
        {
            for (int i = 0; i < 100; i++)
            {
                byte[] current = new byte[Random.Next(256)];
                Random.NextBytes(current);
                sorter.Add(new BytesRef(current));
            }

            // Create two iterators and check that they're aligned with each other.
            IBytesRefEnumerator i1 = sorter.GetEnumerator();
            IBytesRefEnumerator i2 = sorter.GetEnumerator();

            // Verify sorter contract.
            try
            {
                sorter.Add(new BytesRef(new byte[1]));
                fail("expected contract violation.");
            }
            catch (Exception e) when(e.IsIllegalStateException())
            {
                // Expected.
            }
            while (i1.MoveNext() && i2.MoveNext())
            {
                assertEquals(i1.Current, i2.Current);
            }
            assertFalse(i1.MoveNext());
            assertFalse(i2.MoveNext());
        }
Example #4
0
        public void TestFieldContents_1()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.GetEntryEnumerator();

                assertTrue("First element doesn't exist.", it.MoveNext());
                assertTrue("First element isn't correct", it.Current.Utf8ToString().Equals("Jerry", StringComparison.Ordinal));
                assertTrue("Second element doesn't exist.", it.MoveNext());
                assertTrue("Second element isn't correct", it.Current.Utf8ToString().Equals("Tom", StringComparison.Ordinal));
                assertFalse("More elements than expected", it.MoveNext());

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.GetEntryEnumerator();

                int counter = 2;
                while (it.MoveNext())
                {
                    counter--;
                }

                assertTrue("Number of words incorrect", counter == 0);
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
Example #5
0
        /// <summary>
        /// Sort a single partition in-memory. </summary>
        private FileInfo SortPartition(/*int len*/) // LUCENENET NOTE: made private, since protected is not valid in a sealed class. Also eliminated unused parameter.
        {
            var      data     = this.buffer;
            FileInfo tempFile = FileSupport.CreateTempFile("sort", "partition", DefaultTempDir());

            long start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond;                     // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            sortInfo.SortTime += ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - start); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            using (var @out = new ByteSequencesWriter(tempFile))
            {
                IBytesRefEnumerator iter = buffer.GetEnumerator(comparer);
                while (iter.MoveNext())
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(iter.Current.Length <= ushort.MaxValue);
                    }
                    @out.Write(iter.Current);
                }
            }

            // Clean up the buffer for the next partition.
            data.Clear();
            return(tempFile);
        }
        public void TestEmpty()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            writer.Commit();
            writer.Dispose();
            IndexReader         ir         = DirectoryReader.Open(dir);
            IDictionary         dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
            IBytesRefEnumerator tf         = dictionary.GetEntryEnumerator();

            assertNull(tf.Comparer);
            assertFalse(tf.MoveNext());
            dir.Dispose();
        }
Example #7
0
        public virtual void TestAppendIterator()
        {
            Random         random     = Random;
            BytesRefArray  list       = new BytesRefArray(Util.Counter.NewCounter());
            IList <string> stringList = new JCG.List <string>();

            for (int j = 0; j < 2; j++)
            {
                if (j > 0 && random.NextBoolean())
                {
                    list.Clear();
                    stringList.Clear();
                }
                int      entries  = AtLeast(500);
                BytesRef spare    = new BytesRef();
                int      initSize = list.Length;
                for (int i = 0; i < entries; i++)
                {
                    string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random);
                    spare.CopyChars(randomRealisticUnicodeString);
                    Assert.AreEqual(i + initSize, list.Append(spare));
                    stringList.Add(randomRealisticUnicodeString);
                }
                for (int i = 0; i < entries; i++)
                {
                    Assert.IsNotNull(list.Get(spare, i));
                    Assert.AreEqual(stringList[i], spare.Utf8ToString(), "entry " + i + " doesn't match");
                }

                // check random
                for (int i = 0; i < entries; i++)
                {
                    int e = random.Next(entries);
                    Assert.IsNotNull(list.Get(spare, e));
                    Assert.AreEqual(stringList[e], spare.Utf8ToString(), "entry " + i + " doesn't match");
                }
                for (int i = 0; i < 2; i++)
                {
                    IBytesRefEnumerator iterator = list.GetEnumerator();
                    foreach (string @string in stringList)
                    {
                        Assert.IsTrue(iterator.MoveNext());
                        Assert.AreEqual(@string, iterator.Current.Utf8ToString());
                    }
                }
            }
        }
Example #8
0
        public void TestFieldNonExistent()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "nonexistent_field");
                it = ld.GetEntryEnumerator();

                assertFalse("More elements than expected", it.MoveNext());
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
Example #9
0
        public void TestFieldAaa()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "aaa");
                it = ld.GetEntryEnumerator();
                assertTrue("First element doesn't exist.", it.MoveNext());
                assertTrue("First element isn't correct", it.Current.Utf8ToString().Equals("foo", StringComparison.Ordinal));
                assertFalse("More elements than expected", it.MoveNext());
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
Example #10
0
        public virtual void TestSort()
        {
            Random         random     = Random;
            BytesRefArray  list       = new BytesRefArray(Util.Counter.NewCounter());
            IList <string> stringList = new JCG.List <string>();

            for (int j = 0; j < 2; j++)
            {
                if (j > 0 && random.NextBoolean())
                {
                    list.Clear();
                    stringList.Clear();
                }
                int      entries  = AtLeast(500);
                BytesRef spare    = new BytesRef();
                int      initSize = list.Length;
                for (int i = 0; i < entries; i++)
                {
                    string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random);
                    spare.CopyChars(randomRealisticUnicodeString);
                    Assert.AreEqual(initSize + i, list.Append(spare));
                    stringList.Add(randomRealisticUnicodeString);
                }

                // LUCENENET NOTE: Must sort using ArrayUtil.GetNaturalComparator<T>()
                // to ensure culture isn't taken into consideration during the sort,
                // which will match the sort order of BytesRef.UTF8SortedAsUTF16Comparer.
                CollectionUtil.TimSort(stringList);
#pragma warning disable 612, 618
                IBytesRefEnumerator iter = list.GetEnumerator(BytesRef.UTF8SortedAsUTF16Comparer);
#pragma warning restore 612, 618
                int a = 0;
                while (iter.MoveNext())
                {
                    Assert.AreEqual(stringList[a], iter.Current.Utf8ToString(), "entry " + a + " doesn't match");
                    a++;
                }
                Assert.IsFalse(iter.MoveNext());
                Assert.AreEqual(a, stringList.Count);
            }
        }
Example #11
0
        public void TestFieldContents_2()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.GetEntryEnumerator();

                // just iterate through words
                assertTrue(it.MoveNext());
                assertEquals("First element isn't correct", "Jerry", it.Current.Utf8ToString());
                assertTrue(it.MoveNext());
                assertEquals("Second element isn't correct", "Tom", it.Current.Utf8ToString());
                assertFalse("Nonexistent element is really null", it.MoveNext());
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
Example #12
0
 /// <summary>
 /// Creates a new wrapper, wrapping the specified iterator and
 /// specifying a weight value of <c>1</c> for all terms
 /// and nullifies associated payloads.
 /// </summary>
 public InputEnumeratorWrapper(IBytesRefEnumerator wrapped)
 {
     this.wrapped = wrapped;
 }
Example #13
0
 /// <summary>
 /// Creates a new wrapper, wrapping the specified iterator and
 /// specifying a weight value of <code>1</code> for all terms.
 /// </summary>
 public TermFreqEnumeratorWrapper(IBytesRefEnumerator wrapped)
 {
     this.wrapped = wrapped;
 }