public void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer (Random())); iwc.SetMergePolicy(NewLogMergePolicy()); var iw = new RandomIndexWriter(Random(), dir, iwc); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store .NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); iw.ForceMerge(1); reader = iw.Reader; iw.Dispose(); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int num = AtLeast(3); int skip = Random().Next(num); var terms = new List<Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.Reader; w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves.First(); TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality()); reader.Dispose(); dir.Dispose(); }
public override void BeforeClass() { base.BeforeClass(); dir = NewDirectory(); sdir1 = NewDirectory(); sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, new MockAnalyzer(Random)); RandomIndexWriter swriter1 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, sdir1, new MockAnalyzer(Random)); RandomIndexWriter swriter2 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, sdir2, new MockAnalyzer(Random)); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); multiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(sdir1), DirectoryReader.Open(sdir2) }, true); multiSearcher = NewSearcher(multiReader); multiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(sdir1), DirectoryReader.Open(dir) }, true); multiSearcherDupls = NewSearcher(multiReaderDupls); }
public virtual void TestBooleanScorerMax() { Directory dir = NewDirectory(); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); int docCount = AtLeast(10000); for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(NewField("field", "a", TextField.TYPE_NOT_STORED)); riw.AddDocument(doc); } riw.ForceMerge(1); IndexReader r = riw.Reader; riw.Dispose(); IndexSearcher s = NewSearcher(r); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); Weight w = s.CreateNormalizedWeight(bq); Assert.AreEqual(1, s.IndexReader.Leaves.Count); BulkScorer scorer = w.BulkScorer(s.IndexReader.Leaves[0], false, null); FixedBitSet hits = new FixedBitSet(docCount); AtomicInteger end = new AtomicInteger(); Collector c = new CollectorAnonymousInnerClassHelper(this, scorer, hits, end); while (end.Get() < docCount) { int inc = TestUtil.NextInt(Random(), 1, 1000); end.AddAndGet(inc); scorer.Score(c, end.Get()); } Assert.AreEqual(docCount, hits.Cardinality()); r.Dispose(); dir.Dispose(); }
public virtual void TestSumDocFreq_Mem() { int numDocs = AtLeast(500); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); Field id = NewStringField("id", "", Field.Store.NO); Field field1 = NewTextField("foo", "", Field.Store.NO); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(id); doc.Add(field1); doc.Add(field2); for (int i = 0; i < numDocs; i++) { id.StringValue = "" + i; char ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z'); char ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z'); field1.StringValue = "" + ch1 + " " + ch2; ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z'); ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z'); field2.StringValue = "" + ch1 + " " + ch2; writer.AddDocument(doc); } IndexReader ir = writer.Reader; AssertSumDocFreq(ir); ir.Dispose(); int numDeletions = AtLeast(20); for (int i = 0; i < numDeletions; i++) { writer.DeleteDocuments(new Term("id", "" + Random().Next(numDocs))); } writer.ForceMerge(1); writer.Dispose(); ir = DirectoryReader.Open(dir); AssertSumDocFreq(ir); ir.Dispose(); dir.Dispose(); }
public void TestSkipField() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, Similarity, TimeZone); int num = AtLeast(10); var terms = new HashSet <Term>(); for (int i = 0; i < num; i++) { string field = "field" + Random.Next(100); terms.Add(new Term(field, "content1")); Document doc = new Document(); doc.Add(NewStringField(field, "content1", Field.Store.YES)); w.AddDocument(doc); } int randomFields = Random.Next(10); for (int i = 0; i < randomFields; i++) { while (true) { string field = "field" + Random.Next(100); Term t = new Term(field, "content1"); if (!terms.Contains(t)) { terms.Add(t); break; } } } w.ForceMerge(1); IndexReader reader = w.GetReader(); w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves.First(); TermsFilter tf = new TermsFilter(terms.ToList()); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals(context.Reader.NumDocs, bits.Cardinality()); reader.Dispose(); dir.Dispose(); }
public virtual void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { iw.AddDocument(Doc()); } IndexReader ir = iw.Reader; VerifyCount(ir); ir.Dispose(); iw.ForceMerge(1); ir = iw.Reader; VerifyCount(ir); ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestAcrossFields() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true)); Document doc = new Document(); doc.Add(new TextField("hasMaybepayload", "here we go", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true)); doc = new Document(); doc.Add(new TextField("hasMaybepayload2", "here we go", Field.Store.YES)); writer.AddDocument(doc); writer.AddDocument(doc); writer.ForceMerge(1); writer.Dispose(); dir.Dispose(); }
public virtual void TestBinary() { Directory dir = NewDirectory(); Document doc = new Document(); BytesRef @ref = new BytesRef(); Field field = new BinaryDocValuesField("bytes", @ref); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { @ref.CopyChars(TestUtil.RandomUnicodeString(Random())); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes"); BinaryDocValues single = merged.GetBinaryDocValues("bytes"); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); for (int i = 0; i < numDocs; i++) { single.Get(i, expected); multi.Get(i, actual); Assert.AreEqual(expected, actual); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int num = AtLeast(3); int skip = Random.Next(num); var terms = new JCG.List <Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.GetReader(); w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves[0]; TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality); reader.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); doc.Add(NewTextField("field", "one two three four five", Field.Store.YES)); doc.Add(NewTextField("sorter", "b", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "one two three four", Field.Store.YES)); doc.Add(NewTextField("sorter", "d", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "one two three y", Field.Store.YES)); doc.Add(NewTextField("sorter", "a", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "one two x", Field.Store.YES)); doc.Add(NewTextField("sorter", "c", Field.Store.YES)); writer.AddDocument(doc); // tests here require single segment (eg try seed // 8239472272678419952L), because SingleDocTestFilter(x) // blindly accepts that docID in any sub-segment writer.ForceMerge(1); reader = writer.GetReader(); writer.Dispose(); searcher = NewSearcher(reader); query = new TermQuery(new Term("field", "three")); filter = NewStaticFilterB(); }
public override void BeforeClass() { base.BeforeClass(); dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddSome(doc, alwaysTerms); if (Random.Next(100) < 90) { AddSome(doc, commonTerms); } if (Random.Next(100) < 50) { AddSome(doc, mediumTerms); } if (Random.Next(100) < 10) { AddSome(doc, rareTerms); } iw.AddDocument(doc); } iw.ForceMerge(1); iw.Dispose(); r = DirectoryReader.Open(dir); atomicReader = GetOnlySegmentReader(r); searcher = new IndexSearcher(atomicReader); searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer (Random)); iwc.SetMergePolicy(NewLogMergePolicy()); var iw = new RandomIndexWriter(Random, dir, iwc); var doc = new Document { NewStringField("id", "1", Field.Store.YES), NewTextField("body", "some contents and more contents", Field.Store.NO), new NumericDocValuesField("popularity", 5) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "2", Field.Store.YES), NewTextField("body", "another document with different contents", Field.Store.NO), new NumericDocValuesField("popularity", 20) }; iw.AddDocument(doc); doc = new Document { NewStringField("id", "3", Field.Store.YES), NewTextField("body", "crappy contents", Field.Store.NO), new NumericDocValuesField("popularity", 2) }; iw.AddDocument(doc); iw.ForceMerge(1); reader = iw.GetReader(); iw.Dispose(); }
public virtual void TestAddIndexes() { Directory d1 = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d1, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); doc.Add(new NumericDocValuesField("dv", 1)); w.AddDocument(doc); IndexReader r1 = w.Reader; w.Dispose(); Directory d2 = NewDirectory(); w = new RandomIndexWriter(Random(), d2, Similarity, TimeZone); doc = new Document(); doc.Add(NewStringField("id", "2", Field.Store.YES)); doc.Add(new NumericDocValuesField("dv", 2)); w.AddDocument(doc); IndexReader r2 = w.Reader; w.Dispose(); Directory d3 = NewDirectory(); w = new RandomIndexWriter(Random(), d3, Similarity, TimeZone); w.AddIndexes(SlowCompositeReaderWrapper.Wrap(r1), SlowCompositeReaderWrapper.Wrap(r2)); r1.Dispose(); d1.Dispose(); r2.Dispose(); d2.Dispose(); w.ForceMerge(1); DirectoryReader r3 = w.Reader; w.Dispose(); AtomicReader sr = GetOnlySegmentReader(r3); Assert.AreEqual(2, sr.NumDocs); NumericDocValues docValues = sr.GetNumericDocValues("dv"); Assert.IsNotNull(docValues); r3.Dispose(); d3.Dispose(); }
public virtual void TestNumerics() { Directory dir = NewDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { field.LongValue = Random().NextLong(); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); NumericDocValues multi = MultiDocValues.GetNumericValues(ir, "numbers"); NumericDocValues single = merged.GetNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public void TestFieldNotPresent() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int num = AtLeast(3); int skip = Random().Next(num); var terms = new List <Term>(); for (int i = 0; i < num; i++) { terms.Add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.Add(NewStringField("field" + i, "content1", Field.Store.YES)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader reader = w.Reader; w.Dispose(); assertEquals(1, reader.Leaves.size()); AtomicReaderContext context = reader.Leaves.First(); TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs); assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality()); reader.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); //Add series of docs with filterable fields : url, text and dates flags AddDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101"); AddDoc(writer, "http://lucene.apache.org", "New release pending", "20040102"); AddDoc(writer, "http://lucene.apache.org", "Lucene 1.9 out now", "20050101"); AddDoc(writer, "http://www.bar.com", "Local man bites dog", "20040101"); AddDoc(writer, "http://www.bar.com", "Dog bites local man", "20040102"); AddDoc(writer, "http://www.bar.com", "Dog uses Lucene", "20050101"); AddDoc(writer, "http://lucene.apache.org", "Lucene 2.0 out", "20050101"); AddDoc(writer, "http://lucene.apache.org", "Oops. Lucene 2.1 out", "20050102"); // Until we fix LUCENE-2348, the index must // have only 1 segment: writer.ForceMerge(1); reader = writer.Reader; writer.Dispose(); searcher = NewSearcher(reader); }
[AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test fails on x86 on .NET Framework in Release mode only #endif public virtual void TestBS2DisjunctionNextVsAdvance() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); int numDocs = AtLeast(300); for (int docUpto = 0; docUpto < numDocs; docUpto++) { string contents = "a"; if (Random.Next(20) <= 16) { contents += " b"; } if (Random.Next(20) <= 8) { contents += " c"; } if (Random.Next(20) <= 4) { contents += " d"; } if (Random.Next(20) <= 2) { contents += " e"; } if (Random.Next(20) <= 1) { contents += " f"; } Document doc = new Document(); doc.Add(new TextField("field", contents, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); w.Dispose(); for (int iter = 0; iter < 10 * RandomMultiplier; iter++) { if (Verbose) { Console.WriteLine("iter=" + iter); } IList <string> terms = new List <string> { "a", "b", "c", "d", "e", "f" }; int numTerms = TestUtil.NextInt32(Random, 1, terms.Count); while (terms.Count > numTerms) { terms.RemoveAt(Random.Next(terms.Count)); } if (Verbose) { Console.WriteLine(" terms=" + terms); } BooleanQuery q = new BooleanQuery(); foreach (string term in terms) { q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD)); } Weight weight = s.CreateNormalizedWeight(q); Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null); // First pass: just use .NextDoc() to gather all hits IList <ScoreDoc> hits = new List <ScoreDoc>(); while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore())); } if (Verbose) { Console.WriteLine(" " + hits.Count + " hits"); } // Now, randomly next/advance through the list and // verify exact match: for (int iter2 = 0; iter2 < 10; iter2++) { weight = s.CreateNormalizedWeight(q); scorer = weight.GetScorer(s.m_leafContexts[0], null); if (Verbose) { Console.WriteLine(" iter2=" + iter2); } int upto = -1; while (upto < hits.Count) { int nextUpto; int nextDoc; int left = hits.Count - upto; if (left == 1 || Random.nextBoolean()) { // next nextUpto = 1 + upto; nextDoc = scorer.NextDoc(); } else { // advance int inc = TestUtil.NextInt32(Random, 1, left - 1); nextUpto = inc + upto; nextDoc = scorer.Advance(hits[nextUpto].Doc); } if (nextUpto == hits.Count) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc); } else { ScoreDoc hit = hits[nextUpto]; Assert.AreEqual(hit.Doc, nextDoc); // Test for precise float equality: Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore()); } upto = nextUpto; } } } r.Dispose(); d.Dispose(); }
private void CreateRandomIndexes() { dir1 = NewDirectory(); dir2 = NewDirectory(); int numDocs = AtLeast(150); int numTerms = TestUtil.NextInt32(Random, 1, numDocs / 5); ISet <string> randomTerms = new JCG.HashSet <string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random)); } terms = new JCG.List <string>(randomTerms); long seed = Random.NextInt64(); IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); iwc2.SetMergePolicy(NewSortingMergePolicy(sort)); RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1); RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2); for (int i = 0; i < numDocs; ++i) { if (Random.nextInt(5) == 0 && i != numDocs - 1) { string term = RandomPicks.RandomFrom(Random, terms); iw1.DeleteDocuments(new Term("s", term)); iw2.DeleteDocuments(new Term("s", term)); } Document doc = randomDocument(); iw1.AddDocument(doc); iw2.AddDocument(doc); if (Random.nextInt(8) == 0) { iw1.Commit(); iw2.Commit(); } } // Make sure we have something to merge iw1.Commit(); iw2.Commit(); Document doc2 = randomDocument(); // NOTE: don't use RIW.addDocument directly, since it sometimes commits // which may trigger a merge, at which case forceMerge may not do anything. // With field updates this is a problem, since the updates can go into the // single segment in the index, and threefore the index won't be sorted. // This hurts the assumption of the test later on, that the index is sorted // by SortingMP. iw1.IndexWriter.AddDocument(doc2); iw2.IndexWriter.AddDocument(doc2); if (DefaultCodecSupportsFieldUpdates) { // update NDV of docs belonging to one term (covers many documents) long value = Random.NextInt64(); string term = RandomPicks.RandomFrom(Random, terms); iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); } iw1.ForceMerge(1); iw2.ForceMerge(1); iw1.Dispose(); iw2.Dispose(); reader = DirectoryReader.Open(dir1); sortedReader = DirectoryReader.Open(dir2); }
public void TestAdvanceSingleParentNoChild() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogDocMergePolicy())); Document parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "1", Field.Store.NO)); parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO)); w.AddDocuments(Arrays.AsList(parentDoc)); // Add another doc so scorer is not null parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "2", Field.Store.NO)); parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO)); Document childDoc = new Document(); childDoc.Add(NewStringField("child", "2", Field.Store.NO)); w.AddDocuments(Arrays.AsList(childDoc, parentDoc)); // Need single seg: w.ForceMerge(1); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); Query tq = new TermQuery(new Term("child", "2")); Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes")))); ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); Weight weight = s.CreateNormalizedWeight(q); DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null); assertEquals(2, disi.Advance(0)); r.Dispose(); dir.Dispose(); }
public virtual void Test() { IList<string> postingsList = new List<string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList = CollectionsHelper.Shuffle(postingsList); ConcurrentQueue<string> postings = new ConcurrentQueue<string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType((FieldType)prototype.FieldType); if (Random().NextBoolean()) { fieldType.OmitNorms = true; } int options = Random().Next(3); if (options == 0) { fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadClass[] threads = new ThreadClass[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new Random(Random().Next()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.Terms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Size()); TermsEnum termsEnum = terms.Iterator(null); BytesRef termBR; while ((termBR = termsEnum.Next()) != null) { int value = Convert.ToInt32(termBR.Utf8ToString()); Assert.AreEqual(value, termsEnum.TotalTermFreq()); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestForceMergeDeletesMaxSegSize() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); TieredMergePolicy tmp = new TieredMergePolicy(); tmp.MaxMergedSegmentMB = 0.01; tmp.ForceMergeDeletesPctAllowed = 0.0; conf.SetMergePolicy(tmp); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, conf); w.RandomForceMerge = false; int numDocs = AtLeast(200); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.NO)); doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.Reader; Assert.AreEqual(numDocs, r.MaxDoc); Assert.AreEqual(numDocs, r.NumDocs); r.Dispose(); if (VERBOSE) { Console.WriteLine("\nTEST: delete doc"); } w.DeleteDocuments(new Term("id", "" + (42 + 17))); r = w.Reader; Assert.AreEqual(numDocs, r.MaxDoc); Assert.AreEqual(numDocs - 1, r.NumDocs); r.Dispose(); w.ForceMergeDeletes(); r = w.Reader; Assert.AreEqual(numDocs - 1, r.MaxDoc); Assert.AreEqual(numDocs - 1, r.NumDocs); r.Dispose(); w.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); doc.Add(NewTextField("field", "one two three four five", Field.Store.YES)); doc.Add(NewTextField("sorter", "b", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "one two three four", Field.Store.YES)); doc.Add(NewTextField("sorter", "d", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "one two three y", Field.Store.YES)); doc.Add(NewTextField("sorter", "a", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "one two x", Field.Store.YES)); doc.Add(NewTextField("sorter", "c", Field.Store.YES)); writer.AddDocument(doc); // tests here require single segment (eg try seed // 8239472272678419952L), because SingleDocTestFilter(x) // blindly accepts that docID in any sub-segment writer.ForceMerge(1); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); Query = new TermQuery(new Term("field", "three")); Filter = NewStaticFilterB(); }
public virtual void TestRandomStoredFields() { Directory dir = NewDirectory(); Random rand = Random(); RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20))); //w.w.setNoCFSRatio(0.0); int docCount = AtLeast(200); int fieldCount = TestUtil.NextInt(rand, 1, 5); IList<int?> fieldIDs = new List<int?>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.Tokenized = false; Field idField = NewField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.Add(i); } IDictionary<string, Document> docs = new Dictionary<string, Document>(); if (VERBOSE) { Console.WriteLine("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.Stored = true; for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(idField); string id = "" + i; idField.StringValue = id; docs[id] = doc; if (VERBOSE) { Console.WriteLine("TEST: add doc id=" + id); } foreach (int field in fieldIDs) { string s; if (rand.Next(4) != 3) { s = TestUtil.RandomUnicodeString(rand, 1000); doc.Add(NewField("f" + field, s, customType2)); } else { s = null; } } w.AddDocument(doc); if (rand.Next(50) == 17) { // mixup binding of field name -> Number every so often fieldIDs = CollectionsHelper.Shuffle(fieldIDs); } if (rand.Next(5) == 3 && i > 0) { string delID = "" + rand.Next(i); if (VERBOSE) { Console.WriteLine("TEST: delete doc id=" + delID); } w.DeleteDocuments(new Term("id", delID)); docs.Remove(delID); } } if (VERBOSE) { Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields"); } if (docs.Count > 0) { string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/); for (int x = 0; x < 2; x++) { IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: cycle x=" + x + " r=" + r); } int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string testID = idsList[rand.Next(idsList.Length)]; if (VERBOSE) { Console.WriteLine("TEST: test id=" + testID); } TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1); Assert.AreEqual(1, hits.TotalHits); Document doc = r.Document(hits.ScoreDocs[0].Doc); Document docExp = docs[testID]; for (int i = 0; i < fieldCount; i++) { Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i)); } } r.Dispose(); w.ForceMerge(1); } } w.Dispose(); dir.Dispose(); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestMerge() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); int numDocs = AtLeast(100); int numDeletes = Random().Next(numDocs); HashSet<int?> deletes = new HashSet<int?>(); while (deletes.Count < numDeletes) { deletes.Add(Random().Next(numDocs)); } foreach (Options options in ValidOptions()) { RandomDocument[] docs = new RandomDocument[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = docFactory.NewDocument(TestUtil.NextInt(Random(), 1, 3), AtLeast(10), options); } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, ClassEnvRule.Similarity, ClassEnvRule.TimeZone); for (int i = 0; i < numDocs; ++i) { writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); if (Rarely()) { writer.Commit(); } } foreach (int delete in deletes) { writer.DeleteDocuments(new Term("id", "" + delete)); } // merge with deletes writer.ForceMerge(1); IndexReader reader = writer.Reader; for (int i = 0; i < numDocs; ++i) { if (!deletes.Contains(i)) { int docID = DocID(reader, "" + i); AssertEquals(docs[i], reader.GetTermVectors(docID)); } } reader.Dispose(); writer.Dispose(); dir.Dispose(); } }
public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random, dir, analyzer); CreateRandomIndex(AtLeast(50), w, Random.NextInt64()); DirectoryReader reader = w.GetReader(); AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new PriorityQueueAnonymousClass(5); var highFreqQueue = new PriorityQueueAnonymousClass1(5); try { TermsEnum iterator = terms.GetEnumerator(); while (iterator.MoveNext()) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq( BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq( BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); IList <TermAndFreq> highTerms = QueueToList(highFreqQueue); IList <TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new JCG.HashSet <int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); /* * need to force merge here since QueryUtils adds checks based * on leave readers which have different statistics than the top * level reader if we have more than one segment. This could * result in a different query / results. */ w.ForceMerge(1); DirectoryReader reader2 = w.GetReader(); QueryUtils.Check(Random, cq, NewSearcher(reader2)); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }
public static void BeforeClass() { Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddSome(doc, AlwaysTerms); if (Random().Next(100) < 90) { AddSome(doc, CommonTerms); } if (Random().Next(100) < 50) { AddSome(doc, MediumTerms); } if (Random().Next(100) < 10) { AddSome(doc, RareTerms); } iw.AddDocument(doc); } iw.ForceMerge(1); iw.Dispose(); r = DirectoryReader.Open(Dir); atomicReader = GetOnlySegmentReader(r); Searcher = new IndexSearcher(atomicReader); Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(); }
private void Populate(Directory directory, IndexWriterConfig config) { RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, config); for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) { Document document = new Document(); for (int f = 0; f < NUMBER_OF_FIELDS; f++) { document.Add(NewStringField("field" + f, Text, Field.Store.NO)); } writer.AddDocument(document); } writer.ForceMerge(1); writer.Dispose(); }
public virtual void TestDifferentTypedDocValuesField2() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); // Index doc values are single-valued so we should not // be able to add same field more than once: Field f = new NumericDocValuesField("field", 17); doc.Add(f); doc.Add(new SortedDocValuesField("field", new BytesRef("hello"))); try { w.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException iae) { // expected } doc = new Document(); doc.Add(f); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; Assert.AreEqual(17, GetOnlySegmentReader(r).GetNumericDocValues("field").Get(0)); r.Dispose(); w.Dispose(); d.Dispose(); }
/// <summary> /// Indexes at least 1 document with f1, and at least 1 document with f2. /// returns the norms for "field". /// </summary> internal virtual NumericDocValues GetNorms(string field, Field f1, Field f2) { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc); // add f1 Document d = new Document(); d.Add(f1); riw.AddDocument(d); // add f2 d = new Document(); d.Add(f2); riw.AddDocument(d); // add a mix of f1's and f2's int numExtraDocs = TestUtil.NextInt(Random(), 1, 1000); for (int i = 0; i < numExtraDocs; i++) { d = new Document(); d.Add(Random().NextBoolean() ? f1 : f2); riw.AddDocument(d); } IndexReader ir1 = riw.Reader; // todo: generalize NumericDocValues norms1 = MultiDocValues.GetNormValues(ir1, field); // fully merge and validate MultiNorms against single segment. riw.ForceMerge(1); DirectoryReader ir2 = riw.Reader; NumericDocValues norms2 = GetOnlySegmentReader(ir2).GetNormValues(field); if (norms1 == null) { Assert.IsNull(norms2); } else { for (int docID = 0; docID < ir1.MaxDoc; docID++) { Assert.AreEqual(norms1.Get(docID), norms2.Get(docID)); } } ir1.Dispose(); ir2.Dispose(); riw.Dispose(); dir.Dispose(); return norms1; }
public void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.Indexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.StringValue = "" + i; iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].BinaryValue(), fieldValues[0].BinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestIntersectStartTerm() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); doc.Add(NewStringField("field", "abc", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "abd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "acd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "bcd", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.Terms("field"); Automaton automaton = (new RegExp(".*d", RegExp.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te; // should seek to startTerm te = terms.Intersect(ca, new BytesRef("aad")); Assert.AreEqual("abd", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.AreEqual("acd", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.IsNull(te.Next()); // should fail to find ceil label on second arc, rewind te = terms.Intersect(ca, new BytesRef("add")); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.IsNull(te.Next()); // should reach end te = terms.Intersect(ca, new BytesRef("bcd")); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("ddd")); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
public virtual void Test2() { Random random = Random(); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); bool allowDups = random.NextBoolean(); HashSet<string> seen = new HashSet<string>(); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; IList<BytesRef> docValues = new List<BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random); } else { s = TestUtil.RandomUnicodeString(random); } BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (VERBOSE) { Console.WriteLine(" " + numDocs + ": s=" + s); } Document doc = new Document(); doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); docValues.Add(br); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.Reader.Dispose(); } } writer.ForceMerge(1); DirectoryReader r = writer.Reader; writer.Dispose(); AtomicReader sr = GetOnlySegmentReader(r); long END_TIME = DateTime.Now.Millisecond + (TEST_NIGHTLY ? 30 : 1); int NUM_THREADS = TestUtil.NextInt(Random(), 1, 10); ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new ThreadAnonymousInnerClassHelper2(this, random, docValues, sr, END_TIME); threads[thread].Start(); } foreach (ThreadClass thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public virtual void TestCustomDoublesValueSource() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); writer.AddDocument(doc); writer.AddDocument(doc); writer.AddDocument(doc); // Test wants 3 docs in one segment: writer.ForceMerge(1); var vs = new ValueSourceAnonymousInnerClassHelper(this, doc); FacetsConfig config = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); IndexReader r = writer.Reader; IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) }; Filter fastMatchFilter; AtomicBoolean filterWasUsed = new AtomicBoolean(); if (Random().NextBoolean()) { // Sort of silly: fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed); } else { fastMatchFilter = null; } if (VERBOSE) { Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter); } Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.GetTopChildren(10, "field").ToString()); Assert.True(fastMatchFilter == null || filterWasUsed.Get()); DrillDownQuery ddq = new DrillDownQuery(config); ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs)); // Test simple drill-down: Assert.AreEqual(1, s.Search(ddq, 10).TotalHits); // Test drill-sideways after drill-down DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter); DrillSidewaysResult dsr = ds.Search(ddq, 10); Assert.AreEqual(1, dsr.Hits.TotalHits); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Close(r, writer, dir); }
public virtual void TestStressAdvance_Mem() { for (int iter = 0; iter < 3; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); HashSet<int> aDocs = new HashSet<int>(); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int num = AtLeast(4097); if (VERBOSE) { Console.WriteLine("\nTEST: numDocs=" + num); } for (int id = 0; id < num; id++) { if (Random().Next(4) == 3) { f.StringValue = "a"; aDocs.Add(id); } else { f.StringValue = "b"; } idField.StringValue = "" + id; w.AddDocument(doc); if (VERBOSE) { Console.WriteLine("\nTEST: doc upto " + id); } } w.ForceMerge(1); IList<int> aDocIDs = new List<int>(); IList<int> bDocIDs = new List<int>(); DirectoryReader r = w.Reader; int[] idToDocID = new int[r.MaxDoc]; for (int docID = 0; docID < idToDocID.Length; docID++) { int id = Convert.ToInt32(r.Document(docID).Get("id")); if (aDocs.Contains(id)) { aDocIDs.Add(docID); } else { bDocIDs.Add(docID); } } TermsEnum te = GetOnlySegmentReader(r).Fields.Terms("field").Iterator(null); DocsEnum de = null; for (int iter2 = 0; iter2 < 10; iter2++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2); } Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a"))); de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE); TestOne(de, aDocIDs); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b"))); de = TestUtil.Docs(Random(), te, null, de, DocsEnum.FLAG_NONE); TestOne(de, bDocIDs); } w.Dispose(); r.Dispose(); dir.Dispose(); } }
public void Test() { RandomIndexWriter writer; DirectoryReader indexReader; int numParents = AtLeast(200); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg)) { Document parentDoc = new Document(); NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); parentDoc.Add(parentVal); StringField parent = new StringField("parent", "true", Field.Store.YES); parentDoc.Add(parent); for (int i = 0; i < numParents; ++i) { List<Document> documents = new List<Document>(); int numChildren = Random().nextInt(10); for (int j = 0; j < numChildren; ++j) { Document childDoc = new Document(); childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5))); documents.Add(childDoc); } parentVal.LongValue = (Random().nextInt(50)); documents.Add(parentDoc); writer.AddDocuments(documents); } writer.ForceMerge(1); indexReader = writer.Reader; } AtomicReader reader = GetOnlySegmentReader(indexReader); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); FixedBitSet parentBits = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null); NumericDocValues parentValues = reader.GetNumericDocValues("parent_val"); NumericDocValues childValues = reader.GetNumericDocValues("child_val"); Sort parentSort = new Sort(new SortField("parent_val", SortField.Type_e.LONG)); Sort childSort = new Sort(new SortField("child_val", SortField.Type_e.LONG)); Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort))); Sorter sorter = new Sorter(sort); Sorter.DocMap docMap = sorter.Sort(reader); assertEquals(reader.MaxDoc, docMap.Count); int[] children = new int[1]; int numChildren2 = 0; int previousParent = -1; for (int i = 0; i < docMap.Count; ++i) { int oldID = docMap.NewToOld(i); if (parentBits.Get(oldID)) { // check that we have the right children for (int j = 0; j < numChildren2; ++j) { assertEquals(oldID, parentBits.NextSetBit(children[j])); } // check that children are sorted for (int j = 1; j < numChildren2; ++j) { int doc1 = children[j - 1]; int doc2 = children[j]; if (childValues.Get(doc1) == childValues.Get(doc2)) { assertTrue(doc1 < doc2); // sort is stable } else { assertTrue(childValues.Get(doc1) < childValues.Get(doc2)); } } // check that parents are sorted if (previousParent != -1) { if (parentValues.Get(previousParent) == parentValues.Get(oldID)) { assertTrue(previousParent < oldID); } else { assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID)); } } // reset previousParent = oldID; numChildren2 = 0; } else { children = ArrayUtil.Grow(children, numChildren2 + 1); children[numChildren2++] = oldID; } } indexReader.Dispose(); writer.w.Directory.Dispose(); }
public void TestBulkMergeWithDeletes() { int numDocs = AtLeast(200); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); for (int i = 0; i < numDocs; ++i) { Document doc = new Document(); doc.Add(new StringField("id", Convert.ToString(i), Field.Store.YES)); doc.Add(new StoredField("f", TestUtil.RandomSimpleString(Random()))); w.AddDocument(doc); } int deleteCount = TestUtil.NextInt(Random(), 5, numDocs); for (int i = 0; i < deleteCount; ++i) { int id = Random().Next(numDocs); w.DeleteDocuments(new Term("id", Convert.ToString(id))); } w.Commit(); w.Dispose(); w = new RandomIndexWriter(Random(), dir); w.ForceMerge(TestUtil.NextInt(Random(), 1, 3)); w.Commit(); w.Dispose(); TestUtil.CheckIndex(dir); dir.Dispose(); }
public void TestInsideBooleanQuery() { const string idField = "id"; const string toField = "productId"; Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(NewLogMergePolicy())); // 0 Document doc = new Document(); doc.Add(new TextField("description", "random text", Field.Store.NO)); doc.Add(new TextField("name", "name1", Field.Store.NO)); doc.Add(new TextField(idField, "7", Field.Store.NO)); w.AddDocument(doc); // 1 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "2", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 2 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "3", Field.Store.NO)); doc.Add(new TextField(toField, "7", Field.Store.NO)); w.AddDocument(doc); // 3 doc = new Document(); doc.Add(new TextField("description", "more random text", Field.Store.NO)); doc.Add(new TextField("name", "name2", Field.Store.NO)); doc.Add(new TextField(idField, "0", Field.Store.NO)); w.AddDocument(doc); w.Commit(); // 4 doc = new Document(); doc.Add(new TextField("price", "10.0", Field.Store.NO)); doc.Add(new TextField(idField, "5", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); // 5 doc = new Document(); doc.Add(new TextField("price", "20.0", Field.Store.NO)); doc.Add(new TextField(idField, "6", Field.Store.NO)); doc.Add(new TextField(toField, "0", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); IndexSearcher indexSearcher = new IndexSearcher(w.GetReader()); w.Dispose(); // Search for product Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg); BooleanQuery bq = new BooleanQuery(); bq.Add(joinQuery, Occur.SHOULD); bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD); indexSearcher.Search(bq, new CollectorAnonymousClass()); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestZeroTerms() { var d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(NewTextField("field", "one two three", Field.Store.NO)); doc = new Document(); doc.Add(NewTextField("field2", "one two three", Field.Store.NO)); w.AddDocument(doc); w.Commit(); w.DeleteDocuments(new Term("field", "one")); w.ForceMerge(1); IndexReader r = w.Reader; w.Dispose(); Assert.AreEqual(1, r.NumDocs); Assert.AreEqual(1, r.MaxDoc); Terms terms = MultiFields.GetTerms(r, "field"); if (terms != null) { Assert.IsNull(terms.Iterator(null).Next()); } r.Dispose(); d.Dispose(); }
public virtual void TestSortedSetWithDups() { AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = Random().Next(5); for (int j = 0; j < numValues; j++) { doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random(), 2)))); } iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.GetSortedSetDocValues("bytes"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.ValueCount, multi.ValueCount); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); // check values for (long i = 0; i < single.ValueCount; i++) { single.LookupOrd(i, expected); multi.LookupOrd(i, actual); Assert.AreEqual(expected, actual); } // check ord list for (int i = 0; i < numDocs; i++) { single.Document = i; List<long?> expectedList = new List<long?>(); long ord; while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.Add(ord); } multi.Document = i; int upto = 0; while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { Assert.AreEqual((long)expectedList[upto], ord); upto++; } Assert.AreEqual(expectedList.Count, upto); } } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void TestTermUTF16SortOrder() { Random rnd = Random(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(rnd, dir, Similarity, TimeZone); Document d = new Document(); // Single segment Field f = NewStringField("f", "", Field.Store.NO); d.Add(f); char[] chars = new char[2]; HashSet<string> allTerms = new HashSet<string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s; if (rnd.NextBoolean()) { // Single char if (rnd.NextBoolean()) { // Above surrogates chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff); } else { // Below surrogates chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1); } s = new string(chars, 0, 1); } else { // Surrogate pair chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END); Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END); chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END); s = new string(chars, 0, 2); } allTerms.Add(s); f.StringValue = s; writer.AddDocument(d); if ((1 + i) % 42 == 0) { writer.Commit(); } } IndexReader r = writer.Reader; // Test each sub-segment foreach (AtomicReaderContext ctx in r.Leaves) { CheckTermsOrder(ctx.Reader, allTerms, false); } CheckTermsOrder(r, allTerms, true); // Test multi segment r.Dispose(); writer.ForceMerge(1); // Test single segment r = writer.Reader; CheckTermsOrder(r, allTerms, true); r.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); CreateRandomIndex(AtLeast(50), w, Random.NextInt64()); DirectoryReader reader = w.GetReader(); AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new AnonymousPriorityQueue(this, 5); Util.PriorityQueue<TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5); try { TermsEnum iterator = terms.GetIterator(null); while (iterator.Next() != null) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); List<TermAndFreq> highTerms = QueueToList(highFreqQueue); List<TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new JCG.HashSet<int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); w.ForceMerge(1); DirectoryReader reader2 = w.GetReader(); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, cq, NewSearcher(reader2)); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }
public virtual void TestDocsWithField() { AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField()); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (Random().Next(4) >= 0) { doc.Add(new NumericDocValuesField("numbers", Random().NextLong())); } doc.Add(new NumericDocValuesField("numbersAlways", Random().NextLong())); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); Bits multi = MultiDocValues.GetDocsWithField(ir, "numbers"); Bits single = merged.GetDocsWithField("numbers"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.Length(), multi.Length()); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } } multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways"); single = merged.GetDocsWithField("numbersAlways"); Assert.AreEqual(single.Length(), multi.Length()); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void TestMultiValuedDocValuesField() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); Field f = new NumericDocValuesField("field", 17); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.Add(f); doc.Add(f); try { w.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException iae) { // expected } doc = new Document(); doc.Add(f); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(17, FieldCache.DEFAULT.GetInts(GetOnlySegmentReader(r), "field", false).Get(0)); r.Dispose(); d.Dispose(); }
public void Test() { RandomIndexWriter writer; DirectoryReader indexReader; int numParents = AtLeast(200); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg)) { Document parentDoc = new Document(); NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); parentDoc.Add(parentVal); StringField parent = new StringField("parent", "true", Field.Store.YES); parentDoc.Add(parent); for (int i = 0; i < numParents; ++i) { List <Document> documents = new List <Document>(); int numChildren = Random().nextInt(10); for (int j = 0; j < numChildren; ++j) { Document childDoc = new Document(); childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5))); documents.Add(childDoc); } parentVal.SetInt64Value(Random().nextInt(50)); documents.Add(parentDoc); writer.AddDocuments(documents); } writer.ForceMerge(1); indexReader = writer.Reader; } AtomicReader reader = GetOnlySegmentReader(indexReader); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); FixedBitSet parentBits = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null); NumericDocValues parentValues = reader.GetNumericDocValues("parent_val"); NumericDocValues childValues = reader.GetNumericDocValues("child_val"); Sort parentSort = new Sort(new SortField("parent_val", SortFieldType.INT64)); Sort childSort = new Sort(new SortField("child_val", SortFieldType.INT64)); Sort sort = new Sort(new SortField("custom", new BlockJoinComparerSource(parentsFilter, parentSort, childSort))); Sorter sorter = new Sorter(sort); Sorter.DocMap docMap = sorter.Sort(reader); assertEquals(reader.MaxDoc, docMap.Count); int[] children = new int[1]; int numChildren2 = 0; int previousParent = -1; for (int i = 0; i < docMap.Count; ++i) { int oldID = docMap.NewToOld(i); if (parentBits.Get(oldID)) { // check that we have the right children for (int j = 0; j < numChildren2; ++j) { assertEquals(oldID, parentBits.NextSetBit(children[j])); } // check that children are sorted for (int j = 1; j < numChildren2; ++j) { int doc1 = children[j - 1]; int doc2 = children[j]; if (childValues.Get(doc1) == childValues.Get(doc2)) { assertTrue(doc1 < doc2); // sort is stable } else { assertTrue(childValues.Get(doc1) < childValues.Get(doc2)); } } // check that parents are sorted if (previousParent != -1) { if (parentValues.Get(previousParent) == parentValues.Get(oldID)) { assertTrue(previousParent < oldID); } else { assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID)); } } // reset previousParent = oldID; numChildren2 = 0; } else { children = ArrayUtil.Grow(children, numChildren2 + 1); children[numChildren2++] = oldID; } } indexReader.Dispose(); writer.w.Directory.Dispose(); }