protected internal virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = (new HashSet <string>(Arrays.AsList(tk.Terms))).Count; Assert.AreEqual(termCount, terms.Size()); Assert.AreEqual(termCount, terms.SumDocFreq); Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions()); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets()); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads()); HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>(); foreach (string term in tk.Freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparator); TermsEnum termsEnum = terms.Iterator(Random().NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { BytesRef nextTerm = termsEnum.Next(); Assert.AreEqual(sortedTerms[i], nextTerm); Assert.AreEqual(sortedTerms[i], termsEnum.Term()); Assert.AreEqual(1, termsEnum.DocFreq()); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random().NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID()); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)docsEnum.Freq()); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random().NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random().NextBoolean() ? bits : null, Random().NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions() || terms.HasOffsets()) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq(); Assert.AreEqual(tk.Freqs[termsEnum.Term().Utf8ToString()], (int?)freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet <int?> indexes; if (terms.HasPositions()) { indexes = tk.PositionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.StartOffsetToTerms[docsAndPositionsEnum.StartOffset()]; Assert.IsNotNull(indexes); } if (terms.HasPositions()) { bool foundPosition = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.Positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets()) { bool foundOffset = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && tk.StartOffsets[index] == docsAndPositionsEnum.StartOffset() && tk.EndOffsets[index] == docsAndPositionsEnum.EndOffset()) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads()) { bool foundPayload = false; foreach (int index in indexes) { if (tk.TermBytes[index].Equals(termsEnum.Term()) && Equals(tk.Payloads[index], docsAndPositionsEnum.Payload)) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < 5; ++i) { if (Random().NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomInts.RandomFrom(Random(), tk.TermBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomInts.RandomFrom(Random(), tk.TermBytes))); } } }
// [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.IsIndexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.SetStringValue("" + i); iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IIndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
protected internal virtual Options RandomOptions() { return(RandomInts.RandomFrom(Random(), new List <Options>(ValidOptions()))); }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt(random, 10, 100); // terms should affect many docs HashSet <string> updateTerms = new HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomInts.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomInts.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }