protected internal RandomDocument(BaseTermVectorsFormatTestCase baseTermVectorsFormaTestCase, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes) { if (fieldCount > fieldNames.Length) { throw new ArgumentException(); } this.fieldNames = new string[fieldCount]; fieldTypes = new FieldType[fieldCount]; tokenStreams = new RandomTokenStream[fieldCount]; Arrays.Fill(fieldTypes, baseTermVectorsFormaTestCase.FieldType(options)); ISet <string> usedFileNames = new JCG.HashSet <string>(); for (int i = 0; i < fieldCount; ++i) { // LUCENENET NOTE: Using a simple Linq query to filter rather than using brute force makes this a lot // faster (and won't infinitely retry due to poor random distribution). this.fieldNames[i] = RandomPicks.RandomFrom(Random, fieldNames.Except(usedFileNames).ToArray()); //do //{ // this.FieldNames[i] = RandomPicks.RandomFrom(Random(), fieldNames); //} while (usedFileNames.Contains(this.FieldNames[i])); usedFileNames.Add(this.fieldNames[i]); tokenStreams[i] = new RandomTokenStream(baseTermVectorsFormaTestCase, TestUtil.NextInt32(Random, 1, maxTermCount), sampleTerms, sampleTermBytes); } }
private void CreateRandomIndexes(int maxSegments) { dir = NewDirectory(); numDocs = AtLeast(150); int numTerms = TestUtil.NextInt32(Random, 1, numDocs / 5); ISet <string> randomTerms = new JCG.HashSet <string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random)); } terms = new JCG.List <string>(randomTerms); long seed = Random.NextInt64(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new J2N.Randomizer(seed))); iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort)); iw = new RandomIndexWriter(new J2N.Randomizer(seed), dir, iwc); for (int i = 0; i < numDocs; ++i) { Document doc = RandomDocument(); iw.AddDocument(doc); if (i == numDocs / 2 || (i != numDocs - 1 && Random.nextInt(8) == 0)) { iw.Commit(); } if (Random.nextInt(15) == 0) { string term = RandomPicks.RandomFrom(Random, terms); iw.DeleteDocuments(new Term("s", term)); } } reader = iw.GetReader(); }
public virtual void TestNoMergeScheduler_Mem() { MergeScheduler ms = NoMergeScheduler.INSTANCE; ms.Dispose(); ms.Merge(null, RandomPicks.RandomFrom(Random, Enum.GetValues(typeof(MergeTrigger)).Cast <MergeTrigger>().ToArray()), Random.NextBoolean()); }
internal static long RandomLong() { if (Random.NextBoolean()) { long l = 1; if (Random.NextBoolean()) { l *= -1; } foreach (long i in PRIMES) { int m = Random.Next(3); for (int j = 0; j < m; ++j) { l *= i; } } return(l); } else if (Random.NextBoolean()) { return(Random.NextInt64()); } else { return(RandomPicks.RandomFrom(Random, Arrays.AsList(long.MinValue, long.MaxValue, 0L, -1L, 1L))); } }
// private String fieldName; //public DistanceStrategyTest(Param param) //{ // SpatialStrategy strategy = param.strategy; // this.ctx = strategy.SpatialContext; // this.strategy = strategy; //} public override void SetUp() { SpatialStrategy strategy = ((Param)(RandomPicks.RandomFrom(Random, Parameters()))[0]).strategy; this.ctx = strategy.SpatialContext; this.strategy = strategy; base.SetUp(); }
private Document randomDocument() { Document doc = new Document(); doc.Add(new NumericDocValuesField("ndv", Random.NextInt64())); doc.Add(new StringField("s", RandomPicks.RandomFrom(Random, terms), Field.Store.YES)); return(doc); }
public virtual void TestUniqueValuesCompression() { IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); using Directory dir = new RAMDirectory(); using IndexWriter iwriter = new IndexWriter(dir, iwc); int uniqueValueCount = TestUtil.NextInt32(Random, 1, 256); IList <long> values = new JCG.List <long>(); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { long value; if (values.Count < uniqueValueCount) { value = Random.NextInt64(); values.Add(value); } else { value = RandomPicks.RandomFrom(Random, values); } dvf.SetInt64Value(value); iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 20; ++i) { dvf.SetInt64Value(RandomPicks.RandomFrom(Random, values)); iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs did not cost 8 bytes each Assert.IsTrue(size2 < size1 + 8 * 20); }
public void TestEarlyTerminationDifferentSorter() { // test that the collector works correctly when the index was sorted by a // different sorter than the one specified in the ctor. CreateRandomIndexes(5); int numHits = TestUtil.NextInt32(Random, 1, numDocs / 10); Sort sort = new Sort(new SortField("ndv2", SortFieldType.INT64, false)); bool fillFields = Random.nextBoolean(); bool trackDocScores = Random.nextBoolean(); bool trackMaxScore = Random.nextBoolean(); bool inOrder = Random.nextBoolean(); // LUCENENET specific: // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to // fix a hard-to-find null reference exception problem. // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912 //TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); //TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); IndexSearcher searcher = NewSearcher(reader); int iters = AtLeast(5); for (int i = 0; i < iters; ++i) { // LUCENENET specific: // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to // fix a hard-to-find null reference exception problem. // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912 TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms))); searcher.Search(query, collector1); Sort different = new Sort(new SortField("ndv2", SortFieldType.INT64)); searcher.Search(query, new EarlyTerminatingSortingCollectorHelper(collector2, different, numHits)); assertTrue(collector1.TotalHits >= collector2.TotalHits); AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs); } }
public void TestEarlyTermination_() { CreateRandomIndexes(5); int numHits = TestUtil.NextInt32(Random, 1, numDocs / 10); Sort sort = new Sort(new SortField("ndv1", SortFieldType.INT64, false)); bool fillFields = Random.nextBoolean(); bool trackDocScores = Random.nextBoolean(); bool trackMaxScore = Random.nextBoolean(); bool inOrder = Random.nextBoolean(); TopFieldCollector collector1 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TopFieldCollector collector2 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); IndexSearcher searcher = NewSearcher(reader); int iters = AtLeast(5); for (int i = 0; i < iters; ++i) { TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms))); searcher.Search(query, collector1); searcher.Search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits)); } assertTrue(collector1.TotalHits >= collector2.TotalHits); AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs); }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs ISet <string> updateTerms = new JCG.HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }
/// <summary> /// Pick a random object from the <paramref name="collection"/>. /// </summary> public static T NextFrom <T>(this Random random, ICollection <T> collection) { return(RandomPicks.RandomFrom(random, collection)); }
private void CreateRandomIndexes() { dir1 = NewDirectory(); dir2 = NewDirectory(); int numDocs = AtLeast(150); int numTerms = TestUtil.NextInt32(Random, 1, numDocs / 5); ISet <string> randomTerms = new JCG.HashSet <string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random)); } terms = new JCG.List <string>(randomTerms); long seed = Random.NextInt64(); IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); iwc2.SetMergePolicy(NewSortingMergePolicy(sort)); RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1); RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2); for (int i = 0; i < numDocs; ++i) { if (Random.nextInt(5) == 0 && i != numDocs - 1) { string term = RandomPicks.RandomFrom(Random, terms); iw1.DeleteDocuments(new Term("s", term)); iw2.DeleteDocuments(new Term("s", term)); } Document doc = randomDocument(); iw1.AddDocument(doc); iw2.AddDocument(doc); if (Random.nextInt(8) == 0) { iw1.Commit(); iw2.Commit(); } } // Make sure we have something to merge iw1.Commit(); iw2.Commit(); Document doc2 = randomDocument(); // NOTE: don't use RIW.addDocument directly, since it sometimes commits // which may trigger a merge, at which case forceMerge may not do anything. // With field updates this is a problem, since the updates can go into the // single segment in the index, and threefore the index won't be sorted. // This hurts the assumption of the test later on, that the index is sorted // by SortingMP. iw1.IndexWriter.AddDocument(doc2); iw2.IndexWriter.AddDocument(doc2); if (DefaultCodecSupportsFieldUpdates) { // update NDV of docs belonging to one term (covers many documents) long value = Random.NextInt64(); string term = RandomPicks.RandomFrom(Random, terms); iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value); } iw1.ForceMerge(1); iw2.ForceMerge(1); iw1.Dispose(); iw2.Dispose(); reader = DirectoryReader.Open(dir1); sortedReader = DirectoryReader.Open(dir2); }
protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = (new HashSet <string>(Arrays.AsList(tk.terms))).Count; Assert.AreEqual(termCount, terms.Count); Assert.AreEqual(termCount, terms.SumDocFreq); Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads); HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>(); foreach (string term in tk.freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparer); TermsEnum termsEnum = terms.GetIterator(Random.NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { BytesRef nextTerm = termsEnum.Next(); Assert.AreEqual(sortedTerms[i], nextTerm); Assert.AreEqual(sortedTerms[i], termsEnum.Term); Assert.AreEqual(1, termsEnum.DocFreq); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID); Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], (int?)docsEnum.Freq); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions || terms.HasOffsets) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq; Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], (int?)freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet <int?> indexes; if (terms.HasPositions) { indexes = tk.positionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset]; Assert.IsNotNull(indexes); } if (terms.HasPositions) { bool foundPosition = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets) { bool foundOffset = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads) { bool foundPayload = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload())) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } #pragma warning disable 168 catch (Exception e) #pragma warning restore 168 { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < 5; ++i) { if (Random.NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes))); } } }
protected virtual Options RandomOptions() { return(RandomPicks.RandomFrom(Random, new List <Options>(ValidOptions()))); }
public virtual void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) using Directory dir = new MockDirectoryWrapper(Random, new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); //iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); // LUCENENET specific - Reduced amount to keep the total // Nightly test time under 1 hour iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 15)); using RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf); if (dir is MockDirectoryWrapper mockDirectoryWrapper) { mockDirectoryWrapper.Throttling = Throttling.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.IsIndexed = false; Field smallField = new Field("fld", RandomByteArray(Random.Next(10), 256), onlyStored); //int numFields = RandomInts.RandomInt32Between(Random, 500000, 1000000); // LUCENENET specific - Reduced amount to keep the total // Nightly test time under 1 hour int numFields = RandomInts.RandomInt32Between(Random, 250000, 500000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } //Field bigField = new Field("fld", RandomByteArray(RandomInts.RandomInt32Between(Random, 1000000, 5000000), 2), onlyStored); // LUCENENET specific - Reduced amount to keep the total // Nightly test time under 1 hour Field bigField = new Field("fld", RandomByteArray(RandomInts.RandomInt32Between(Random, 500000, 2500000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomPicks.RandomFrom(Random, new Document[] { emptyDoc, bigDoc1, bigDoc2 }); } for (int i = 0; i < numDocs; ++i) { idField.SetStringValue("" + i); iw.AddDocument(docs[i]); if (Random.Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged using DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IIndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue()); } } }