/// <summary> /// Builds the final automaton from a list of entries. /// </summary> private FST <object> BuildAutomaton(IBytesRefSorter sorter) { // Build the automaton. Outputs <object> outputs = NoOutputs.Singleton; object empty = outputs.NoOutput; Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInt32s.DEFAULT, true, 15); BytesRef scratch = new BytesRef(); BytesRef entry; Int32sRef scratchIntsRef = new Int32sRef(); int count = 0; IBytesRefEnumerator iter = sorter.GetEnumerator(); while (iter.MoveNext()) { entry = iter.Current; count++; if (scratch.CompareTo(entry) != 0) { builder.Add(Util.Fst.Util.ToInt32sRef(entry, scratchIntsRef), empty); scratch.CopyBytes(entry); } } return(count == 0 ? null : builder.Finish()); }
/// <summary> /// Sort a single partition in-memory. </summary> private FileInfo SortPartition(/*int len*/) // LUCENENET NOTE: made private, since protected is not valid in a sealed class. Also eliminated unused parameter. { var data = this.buffer; FileInfo tempFile = FileSupport.CreateTempFile("sort", "partition", DefaultTempDir()); long start = Environment.TickCount; sortInfo.SortTime += (Environment.TickCount - start); using (var @out = new ByteSequencesWriter(tempFile)) { IBytesRefEnumerator iter = buffer.GetEnumerator(comparer); while (iter.MoveNext()) { if (Debugging.AssertsEnabled) { Debugging.Assert(iter.Current.Length <= ushort.MaxValue); } @out.Write(iter.Current); } } // Clean up the buffer for the next partition. data.Clear(); return(tempFile); }
private void Check(IBytesRefSorter sorter) { for (int i = 0; i < 100; i++) { byte[] current = new byte[Random.Next(256)]; Random.NextBytes(current); sorter.Add(new BytesRef(current)); } // Create two iterators and check that they're aligned with each other. IBytesRefEnumerator i1 = sorter.GetEnumerator(); IBytesRefEnumerator i2 = sorter.GetEnumerator(); // Verify sorter contract. try { sorter.Add(new BytesRef(new byte[1])); fail("expected contract violation."); } catch (Exception e) when(e.IsIllegalStateException()) { // Expected. } while (i1.MoveNext() && i2.MoveNext()) { assertEquals(i1.Current, i2.Current); } assertFalse(i1.MoveNext()); assertFalse(i2.MoveNext()); }
public void TestFieldContents_1() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetEntryEnumerator(); assertTrue("First element doesn't exist.", it.MoveNext()); assertTrue("First element isn't correct", it.Current.Utf8ToString().Equals("Jerry", StringComparison.Ordinal)); assertTrue("Second element doesn't exist.", it.MoveNext()); assertTrue("Second element isn't correct", it.Current.Utf8ToString().Equals("Tom", StringComparison.Ordinal)); assertFalse("More elements than expected", it.MoveNext()); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetEntryEnumerator(); int counter = 2; while (it.MoveNext()) { counter--; } assertTrue("Number of words incorrect", counter == 0); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
/// <summary> /// Sort a single partition in-memory. </summary> private FileInfo SortPartition(/*int len*/) // LUCENENET NOTE: made private, since protected is not valid in a sealed class. Also eliminated unused parameter. { var data = this.buffer; FileInfo tempFile = FileSupport.CreateTempFile("sort", "partition", DefaultTempDir()); long start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results sortInfo.SortTime += ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - start); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results using (var @out = new ByteSequencesWriter(tempFile)) { IBytesRefEnumerator iter = buffer.GetEnumerator(comparer); while (iter.MoveNext()) { if (Debugging.AssertsEnabled) { Debugging.Assert(iter.Current.Length <= ushort.MaxValue); } @out.Write(iter.Current); } } // Clean up the buffer for the next partition. data.Clear(); return(tempFile); }
public void TestEmpty() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); IBytesRefEnumerator tf = dictionary.GetEntryEnumerator(); assertNull(tf.Comparer); assertFalse(tf.MoveNext()); dir.Dispose(); }
public virtual void TestAppendIterator() { Random random = Random; BytesRefArray list = new BytesRefArray(Util.Counter.NewCounter()); IList <string> stringList = new JCG.List <string>(); for (int j = 0; j < 2; j++) { if (j > 0 && random.NextBoolean()) { list.Clear(); stringList.Clear(); } int entries = AtLeast(500); BytesRef spare = new BytesRef(); int initSize = list.Length; for (int i = 0; i < entries; i++) { string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random); spare.CopyChars(randomRealisticUnicodeString); Assert.AreEqual(i + initSize, list.Append(spare)); stringList.Add(randomRealisticUnicodeString); } for (int i = 0; i < entries; i++) { Assert.IsNotNull(list.Get(spare, i)); Assert.AreEqual(stringList[i], spare.Utf8ToString(), "entry " + i + " doesn't match"); } // check random for (int i = 0; i < entries; i++) { int e = random.Next(entries); Assert.IsNotNull(list.Get(spare, e)); Assert.AreEqual(stringList[e], spare.Utf8ToString(), "entry " + i + " doesn't match"); } for (int i = 0; i < 2; i++) { IBytesRefEnumerator iterator = list.GetEnumerator(); foreach (string @string in stringList) { Assert.IsTrue(iterator.MoveNext()); Assert.AreEqual(@string, iterator.Current.Utf8ToString()); } } } }
public void TestFieldNonExistent() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "nonexistent_field"); it = ld.GetEntryEnumerator(); assertFalse("More elements than expected", it.MoveNext()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public void TestFieldAaa() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "aaa"); it = ld.GetEntryEnumerator(); assertTrue("First element doesn't exist.", it.MoveNext()); assertTrue("First element isn't correct", it.Current.Utf8ToString().Equals("foo", StringComparison.Ordinal)); assertFalse("More elements than expected", it.MoveNext()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public virtual void TestSort() { Random random = Random; BytesRefArray list = new BytesRefArray(Util.Counter.NewCounter()); IList <string> stringList = new JCG.List <string>(); for (int j = 0; j < 2; j++) { if (j > 0 && random.NextBoolean()) { list.Clear(); stringList.Clear(); } int entries = AtLeast(500); BytesRef spare = new BytesRef(); int initSize = list.Length; for (int i = 0; i < entries; i++) { string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(random); spare.CopyChars(randomRealisticUnicodeString); Assert.AreEqual(initSize + i, list.Append(spare)); stringList.Add(randomRealisticUnicodeString); } // LUCENENET NOTE: Must sort using ArrayUtil.GetNaturalComparator<T>() // to ensure culture isn't taken into consideration during the sort, // which will match the sort order of BytesRef.UTF8SortedAsUTF16Comparer. CollectionUtil.TimSort(stringList); #pragma warning disable 612, 618 IBytesRefEnumerator iter = list.GetEnumerator(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 int a = 0; while (iter.MoveNext()) { Assert.AreEqual(stringList[a], iter.Current.Utf8ToString(), "entry " + a + " doesn't match"); a++; } Assert.IsFalse(iter.MoveNext()); Assert.AreEqual(a, stringList.Count); } }
public void TestFieldContents_2() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetEntryEnumerator(); // just iterate through words assertTrue(it.MoveNext()); assertEquals("First element isn't correct", "Jerry", it.Current.Utf8ToString()); assertTrue(it.MoveNext()); assertEquals("Second element isn't correct", "Tom", it.Current.Utf8ToString()); assertFalse("Nonexistent element is really null", it.MoveNext()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
/// <summary> /// Creates a new wrapper, wrapping the specified iterator and /// specifying a weight value of <c>1</c> for all terms /// and nullifies associated payloads. /// </summary> public InputEnumeratorWrapper(IBytesRefEnumerator wrapped) { this.wrapped = wrapped; }
/// <summary> /// Creates a new wrapper, wrapping the specified iterator and /// specifying a weight value of <code>1</code> for all terms. /// </summary> public TermFreqEnumeratorWrapper(IBytesRefEnumerator wrapped) { this.wrapped = wrapped; }