public virtual void TestClone() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriterConfig clone = (IndexWriterConfig)conf.Clone(); // Make sure parameters that can't be reused are cloned IndexDeletionPolicy delPolicy = conf.DelPolicy; IndexDeletionPolicy delPolicyClone = clone.DelPolicy; Assert.IsTrue(delPolicy.GetType() == delPolicyClone.GetType() && (delPolicy != delPolicyClone || delPolicy.Clone() == delPolicyClone.Clone())); FlushPolicy flushPolicy = conf.FlushPolicy; FlushPolicy flushPolicyClone = clone.FlushPolicy; Assert.IsTrue(flushPolicy.GetType() == flushPolicyClone.GetType() && (flushPolicy != flushPolicyClone || flushPolicy.Clone() == flushPolicyClone.Clone())); DocumentsWriterPerThreadPool pool = conf.IndexerThreadPool; DocumentsWriterPerThreadPool poolClone = clone.IndexerThreadPool; Assert.IsTrue(pool.GetType() == poolClone.GetType() && (pool != poolClone || pool.Clone() == poolClone.Clone())); MergePolicy mergePolicy = conf.MergePolicy; MergePolicy mergePolicyClone = clone.MergePolicy; Assert.IsTrue(mergePolicy.GetType() == mergePolicyClone.GetType() && (mergePolicy != mergePolicyClone || mergePolicy.Clone() == mergePolicyClone.Clone())); MergeScheduler mergeSched = conf.MergeScheduler; MergeScheduler mergeSchedClone = clone.MergeScheduler; Assert.IsTrue(mergeSched.GetType() == mergeSchedClone.GetType() && (mergeSched != mergeSchedClone || mergeSched.Clone() == mergeSchedClone.Clone())); conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(ConcurrentMergeScheduler), clone.MergeScheduler.GetType()); }
public async Task <ActionResult <DndSpell> > updateIndex() { var AppLuceneVersion = Lucene.Net.Util.LuceneVersion.LUCENE_48; var indexLocation = @"C:\Index"; var dir = FSDirectory.Open(indexLocation); var analyzer = new StandardAnalyzer(AppLuceneVersion); var indexconfig = new Lucene.Net.Index.IndexWriterConfig(AppLuceneVersion, analyzer); var write = new IndexWriter(dir, indexconfig); var data = _context.DndSpell.ToList(); foreach (var spell in data) { Document doc = new Document { new StringField("name", spell.Name, Field.Store.YES), new StringField("search_name", spell.Name.ToLower(), Field.Store.YES), new StringField("id", spell.Id.ToString(), Field.Store.YES), //new StringField("desc", spell.Description.ToLower(),Field.Store.YES), new StringField("slug", spell.Slug.ToLower(), Field.Store.YES), }; Console.WriteLine(string.Format(" writing {0}", spell.Name)); write.AddDocument(doc); write.Flush(triggerMerge: false, applyAllDeletes: false); write.Commit(); } return(NotFound()); }
public void TestEmptyChildFilter() { Directory dir = NewDirectory(); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // we don't want to merge - since we rely on certain segment setup IndexWriter w = new IndexWriter(dir, config); IList<Document> docs = new List<Document>(); docs.Add(MakeJob("java", 2007)); docs.Add(MakeJob("python", 2010)); docs.Add(MakeResume("Lisa", "United Kingdom")); w.AddDocuments(docs); docs.Clear(); docs.Add(MakeJob("ruby", 2005)); docs.Add(MakeJob("java", 2006)); docs.Add(MakeResume("Frank", "United States")); w.AddDocuments(docs); w.Commit(); int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field for (int i = 0; i < num; i++) { docs.Clear(); docs.Add(MakeJob("java", 2007)); w.AddDocuments(docs); } IndexReader r = DirectoryReader.Open(w, Random().NextBoolean()); w.Dispose(); assertTrue(r.Leaves.size() > 1); IndexSearcher s = new IndexSearcher(r); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); BooleanQuery childQuery = new BooleanQuery(); childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST)); childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST)); ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); BooleanQuery fullQuery = new BooleanQuery(); fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST)); fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST)); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true); s.Search(fullQuery, c); TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true); assertFalse(float.IsNaN(results.MaxScore)); assertEquals(1, results.TotalGroupedHitCount); assertEquals(1, results.Groups.Length); IGroupDocs<int> group = results.Groups[0]; Document childDoc = s.Doc(group.ScoreDocs[0].Doc); assertEquals("java", childDoc.Get("skill")); assertNotNull(group.GroupValue); Document parentDoc = s.Doc(group.GroupValue); assertEquals("Lisa", parentDoc.Get("name")); r.Dispose(); dir.Dispose(); }
public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
public virtual void TestInfoStreamGetsFieldName() { Directory dir = NewDirectory(); IndexWriter writer; IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer()); ByteArrayOutputStream infoBytes = new ByteArrayOutputStream(); StreamWriter infoPrintStream = new StreamWriter(infoBytes, Encoding.UTF8); PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream); c.InfoStream = printStreamInfoStream; writer = new IndexWriter(dir, c); Document doc = new Document(); doc.Add(NewField("distinctiveFieldName", "aaa ", StoredTextType)); try { writer.AddDocument(doc); Assert.Fail("Failed to fail."); } catch (BadNews badNews) { infoPrintStream.Flush(); string infoStream = Encoding.UTF8.GetString(infoBytes.GetBuffer()); Assert.IsTrue(infoStream.Contains("distinctiveFieldName")); } writer.Dispose(); dir.Dispose(); }
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) { this.input = input; this.dir1 = dir1; this.dir2 = dir2; this.docsInFirstIndex = docsInFirstIndex; this.config1 = config1; this.config2 = config2; }
public override void SetUp() { Directory = NewDirectory(); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter indexWriter = new IndexWriter(Directory, config); for (int i = 0; i < AMOUNT_OF_SEGMENTS; i++) { IList<Document> segmentDocs = CreateDocsForSegment(i); indexWriter.AddDocuments(segmentDocs); indexWriter.Commit(); } IndexReader = DirectoryReader.Open(indexWriter, Random().NextBoolean()); indexWriter.Dispose(); IndexSearcher = new IndexSearcher(IndexReader); ParentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new WildcardQuery(new Term("parent", "*")))); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.OmitNorms = true; ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; Field field = new Field("field", new MyTokenStream(), ft); doc.Add(field); int numDocs = (int.MaxValue / 26) + 1; for (int i = 0; i < numDocs; i++) { w.AddDocument(doc); if (VERBOSE && i % 100000 == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); dir.Dispose(); }
public virtual void TestUniqueValuesCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); int uniqueValueCount = TestUtil.NextInt(Random(), 1, 256); IList<long> values = new List<long>(); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { long value; if (values.Count < uniqueValueCount) { value = Random().NextLong(); values.Add(value); } else { value = RandomInts.RandomFrom(Random(), values); } dvf.LongValue = value; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 20; ++i) { dvf.LongValue = RandomInts.RandomFrom(Random(), values); iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs did not cost 8 bytes each Assert.IsTrue(size2 < size1 + 8 * 20); }
public virtual void TestSingleBigValueCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 20000; ++i) { dvf.LongValue = i & 1023; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); dvf.LongValue = long.MaxValue; iwriter.AddDocument(doc); iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new value did not grow the bpv for every other value Assert.IsTrue(size2 < size1 + (20000 * (63 - 10)) / 8); }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer)new MockPayloadAnalyzer() : new MockAnalyzer(Random()); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random().NextBoolean(); ft.StoreTermVectorPositions = Random().NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.IntToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq(); for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset(); Debug.Assert(start >= 0); int end = dp.EndOffset(); Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.Payload); BytesRef payload = dp.Payload; Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq(); for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset(); Debug.Assert(start >= 0); int end = dp.EndOffset(); Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred")); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.Payload); BytesRef payload = dp.Payload; Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.BufferedUpdatesStreamAny); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.Reader; Assert.IsFalse(writer.BufferedUpdatesStreamAny); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.DoMerge = true; fsmp.Start = 0; fsmp.Length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.Reader; int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs == null); r2.Dispose(); /* /// // added docs are in the ram buffer /// for (int x = 15; x < 20; x++) { /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); /// } /// Assert.IsTrue(writer.numRamDocs() > 0); /// // delete from the ram buffer /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); /// /// Term id3 = new Term("id", Integer.toString(3)); /// /// // delete from the 1st segment /// writer.DeleteDocuments(id3); /// /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// //System.out /// // .println("segdels1:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// // we cause a merge to happen /// fsmp.doMerge = true; /// fsmp.start = 0; /// fsmp.Length = 2; /// System.out.println("maybeMerge "+writer.SegmentInfos); /// /// SegmentInfo info0 = writer.SegmentInfos.Info(0); /// SegmentInfo info1 = writer.SegmentInfos.Info(1); /// /// writer.MaybeMerge(); /// System.out.println("maybeMerge after "+writer.SegmentInfos); /// // there should be docs in RAM /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// // assert we've merged the 1 and 2 segments /// // and still have a segment leftover == 2 /// Assert.AreEqual(2, writer.SegmentInfos.Size()); /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); /// /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// IndexReader r = writer.GetReader(); /// IndexReader r1 = r.getSequentialSubReaders()[0]; /// printDelDocs(r1.GetLiveDocs()); /// int[] docs = toDocsArray(id3, null, r); /// System.out.println("id3 docs:"+Arrays.toString(docs)); /// // there shouldn't be any docs for id:3 /// Assert.IsTrue(docs == null); /// r.Dispose(); /// /// part2(writer, fsmp); /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
public virtual void TestSortedSetWithDups() { AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = Random.Next(5); for (int j = 0; j < numValues; j++) { doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random, 2)))); } iw.AddDocument(doc); if (Random.Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.GetReader(); iw.ForceMerge(1); DirectoryReader ir2 = iw.GetReader(); AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.GetSortedSetDocValues("bytes"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.ValueCount, multi.ValueCount); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); // check values for (long i = 0; i < single.ValueCount; i++) { single.LookupOrd(i, expected); multi.LookupOrd(i, actual); Assert.AreEqual(expected, actual); } // check ord list for (int i = 0; i < numDocs; i++) { single.SetDocument(i); List <long?> expectedList = new List <long?>(); long ord; while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.Add(ord); } multi.SetDocument(i); int upto = 0; while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { Assert.AreEqual((long)expectedList[upto], ord); upto++; } Assert.AreEqual(expectedList.Count, upto); } } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled) { int numDocumentsToIndex = 10 + AtLeast(30); AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetRAMBufferSizeMB(maxRamMB); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsFalse(flushPolicy.FlushOnDocCount); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms); Assert.IsTrue(flushPolicy.FlushOnRAM); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); if (flushPolicy.HasMarkedPending) { Assert.IsTrue(maxRAMBytes < flushControl.peakActiveBytes); } if (ensureNotStalled) { Assert.IsFalse(docsWriter.flushControl.stallControl.WasStalled); } writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes); dir.Dispose(); }
public virtual void TestFixedBinary([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); var bytes = new byte[4]; BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 24); bytes[1] = (byte)(i >> 16); bytes[2] = (byte)(i >> 8); bytes[3] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 24); bytes[1] = (byte)(expectedValue >> 16); bytes[2] = (byte)(expectedValue >> 8); bytes[3] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestTryDeleteDocument() { Directory d = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter w = new IndexWriter(d, iwc); Document doc = new Document(); w.AddDocument(doc); w.AddDocument(doc); w.AddDocument(doc); w.Dispose(); iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); w = new IndexWriter(d, iwc); IndexReader r = DirectoryReader.Open(w, false); Assert.IsTrue(w.TryDeleteDocument(r, 1)); Assert.IsTrue(w.TryDeleteDocument(r.Leaves[0].Reader, 0)); r.Dispose(); w.Dispose(); r = DirectoryReader.Open(d); Assert.AreEqual(2, r.NumDeletedDocs); Assert.IsNotNull(MultiFields.GetLiveDocs(r)); r.Dispose(); d.Dispose(); }
public virtual void TestExistingDeletes() { Directory[] dirs = new Directory[2]; for (int i = 0; i < dirs.Length; i++) { dirs[i] = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer = new IndexWriter(dirs[i], conf); Document doc = new Document(); doc.Add(new StringField("id", "myid", Field.Store.NO)); writer.AddDocument(doc); writer.Dispose(); } IndexWriterConfig conf_ = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer_ = new IndexWriter(dirs[0], conf_); // Now delete the document writer_.DeleteDocuments(new Term("id", "myid")); IndexReader r = DirectoryReader.Open(dirs[1]); try { writer_.AddIndexes(r); } finally { r.Dispose(); } writer_.Commit(); Assert.AreEqual(1, writer_.NumDocs(), "Documents from the incoming index should not have been deleted"); writer_.Dispose(); foreach (Directory dir in dirs) { dir.Dispose(); } }
public virtual void TestSortedTermsEnum() { Directory directory = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwconfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig); Document doc = new Document(); doc.Add(new StringField("field", "hello", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "world", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "beer", Field.Store.NO)); iwriter.AddDocument(doc); iwriter.ForceMerge(1); DirectoryReader ireader = iwriter.Reader; iwriter.Dispose(); AtomicReader ar = GetOnlySegmentReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field"); Assert.AreEqual(3, dv.ValueCount); TermsEnum termsEnum = dv.GetTermsEnum(); // next() Assert.AreEqual("beer", termsEnum.Next().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual("hello", termsEnum.Next().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual("world", termsEnum.Next().Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); // seekCeil() Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz"))); // seekExact() Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world"))); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.SeekExact(0); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); termsEnum.SeekExact(1); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); termsEnum.SeekExact(2); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); ireader.Dispose(); directory.Dispose(); }
public virtual void TestRandomWithPrefix() { Directory dir = NewDirectory(); HashSet <string> prefixes = new HashSet <string>(); int numPrefix = TestUtil.NextInt(Random(), 2, 7); if (VERBOSE) { Console.WriteLine("TEST: use " + numPrefix + " prefixes"); } while (prefixes.Count < numPrefix) { prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random())); //prefixes.Add(TestUtil.RandomSimpleString(random)); } string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/); int NUM_TERMS = AtLeast(20); HashSet <BytesRef> terms = new HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = prefixesArray[Random().Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random()); //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Sometimes swap in codec that impls ord(): if (Random().Next(10) == 7) { Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random(), dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; HashSet <int?> ordsForDocSet = new HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt(Random(), 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random().Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (VERBOSE) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + r); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); foreach (string prefix in prefixesArray) { BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix); int[][] idToOrdsPrefix = new int[NUM_DOCS][]; for (int id = 0; id < NUM_DOCS; id++) { int[] docOrds = idToOrds[id]; IList <int?> newOrds = new List <int?>(); foreach (int ord in idToOrds[id]) { if (StringHelper.StartsWith(termsArray[ord], prefixRef)) { newOrds.Add(ord); } } int[] newOrdsArray = new int[newOrds.Count]; int upto = 0; foreach (int ord in newOrds) { newOrdsArray[upto++] = ord; } idToOrdsPrefix[id] = newOrdsArray; } foreach (AtomicReaderContext ctx in r.Leaves) { if (VERBOSE) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { Console.WriteLine("TEST: top reader"); } Verify(slowR, idToOrdsPrefix, termsArray, prefixRef); } FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); long t0 = DateTime.Now.Millisecond; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); TaskScheduler es = null; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet <string> delIDs = new ConcurrentHashSet <string>(new HashSet <string>()); ISet <string> delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>()); IList <SubDocs> allSubDocs = new ConcurrentList <SubDocs>(new List <SubDocs>()); long stopTime = DateTime.Now.Millisecond + RUN_TIME_SEC * 1000; ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.Now.Millisecond - t0) + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.Now.Millisecond - t0) + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.Now.Millisecond - t0) + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.Now.Millisecond - t0) + " ms]"); } }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = Throttling.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = Throttling.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(Random.Next()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.bufferedUpdatesStream.Any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.GetReader(); Assert.IsFalse(writer.bufferedUpdatesStream.Any()); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.DoMerge = true; fsmp.Start = 0; fsmp.Length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.GetReader(); int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs == null); r2.Dispose(); /* * /// // added docs are in the ram buffer * /// for (int x = 15; x < 20; x++) { * /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); * /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); * /// } * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// // delete from the ram buffer * /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); * /// * /// Term id3 = new Term("id", Integer.toString(3)); * /// * /// // delete from the 1st segment * /// writer.DeleteDocuments(id3); * /// * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// //System.out * /// // .println("segdels1:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// // we cause a merge to happen * /// fsmp.doMerge = true; * /// fsmp.start = 0; * /// fsmp.Length = 2; * /// System.out.println("maybeMerge "+writer.SegmentInfos); * /// * /// SegmentInfo info0 = writer.SegmentInfos.Info(0); * /// SegmentInfo info1 = writer.SegmentInfos.Info(1); * /// * /// writer.MaybeMerge(); * /// System.out.println("maybeMerge after "+writer.SegmentInfos); * /// // there should be docs in RAM * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// // assert we've merged the 1 and 2 segments * /// // and still have a segment leftover == 2 * /// Assert.AreEqual(2, writer.SegmentInfos.Size()); * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); * /// * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// IndexReader r = writer.GetReader(); * /// IndexReader r1 = r.getSequentialSubReaders()[0]; * /// printDelDocs(r1.GetLiveDocs()); * /// int[] docs = toDocsArray(id3, null, r); * /// System.out.println("id3 docs:"+Arrays.toString(docs)); * /// // there shouldn't be any docs for id:3 * /// Assert.IsTrue(docs == null); * /// r.Dispose(); * /// * /// part2(writer, fsmp); * /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
public virtual void TestIntersectStartTerm() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(NewStringField("field", "abc", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "abd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "acd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "bcd", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.GetTerms("field"); Automaton automaton = (new RegExp(".*d", RegExpSyntax.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te; // should seek to startTerm te = terms.Intersect(ca, new BytesRef("aad")); Assert.AreEqual("abd", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("acd", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); // should fail to find ceil label on second arc, rewind te = terms.Intersect(ca, new BytesRef("add")); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); // should reach end te = terms.Intersect(ca, new BytesRef("bcd")); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("ddd")); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields"))); dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER; var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(); ft.Indexed = false; ft.Stored = true; ft.Freeze(); int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20); var value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte)Random().Next(256); } Field f = new Field("fld", value, ft); doc.Add(f); int numDocs = (int)((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.AddDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); if (VERBOSE) { bool found = false; foreach (string file in dir.ListAll()) { if (file.EndsWith(".fdt")) { long fileLength = dir.FileLength(file); if (fileLength >= 1L << 32) { found = true; } Console.WriteLine("File length of " + file + " : " + fileLength); } } if (!found) { Console.WriteLine("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.Open(dir); Document sd = rd.Document(numDocs - 1); Assert.IsNotNull(sd); Assert.AreEqual(1, sd.Fields.Count); BytesRef valueRef = sd.GetBinaryValue("fld"); Assert.IsNotNull(valueRef); Assert.AreEqual(new BytesRef(value), valueRef); rd.Dispose(); dir.Dispose(); }
public virtual void TestBooleanSpanQuery() { int hits = 0; Directory directory = NewDirectory(); Analyzer indexerAnalyzer = new MockAnalyzer(Random()); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); IndexWriter writer = new IndexWriter(directory, config); string FIELD = "content"; Document d = new Document(); d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher searcher = NewSearcher(indexReader); DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f); SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); query.Add(sq1); query.Add(sq2); TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(query, collector); hits = collector.TopDocs().ScoreDocs.Length; foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs) { Console.WriteLine(scoreDoc.Doc); } indexReader.Dispose(); Assert.AreEqual(hits, 1); directory.Dispose(); }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random().NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.SetInfoStream(new PrintStreamInfoStream(Console.Out)); } var ms = iwc.MergeScheduler; if (ms is IConcurrentMergeScheduler) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random().Next(5) == 3) { if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs); lastNumDocs = r2.NumDocs; //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random().NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); HashSet <string> files = new HashSet <string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
public IndexWriterAnonymousInnerClassHelper(TestIndexWriterDelete outerInstance, Directory dir, IndexWriterConfig setReaderPooling, AtomicInteger docsInSegment, AtomicBoolean closing, AtomicBoolean sawAfterFlush) : base(dir, setReaderPooling) { this.OuterInstance = outerInstance; this.DocsInSegment = docsInSegment; this.Closing = closing; this.SawAfterFlush = sawAfterFlush; }
public virtual void TestStressMultiThreading() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); IndexWriter writer = new IndexWriter(dir, conf); // create index int numThreads = TestUtil.NextInt32(Random, 3, 6); int numDocs = AtLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("id", "doc" + i, Store.NO)); double group = Random.NextDouble(); string g; if (group < 0.1) { g = "g0"; } else if (group < 0.5) { g = "g1"; } else if (group < 0.8) { g = "g2"; } else { g = "g3"; } doc.Add(new StringField("updKey", g, Store.NO)); for (int j = 0; j < numThreads; j++) { long value = Random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value))); doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 } writer.AddDocument(doc); } CountdownEvent done = new CountdownEvent(numThreads); AtomicInt32 numUpdates = new AtomicInt32(AtLeast(100)); // same thread updates a field as well as reopens ThreadClass[] threads = new ThreadClass[numThreads]; for (int i = 0; i < threads.Length; i++) { string f = "f" + i; string cf = "cf" + i; threads[i] = new ThreadAnonymousInnerClassHelper(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf); } foreach (ThreadClass t in threads) { t.Start(); } done.Wait(); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; for (int i = 0; i < numThreads; i++) { BinaryDocValues bdv = r.GetBinaryDocValues("f" + i); NumericDocValues control = r.GetNumericDocValues("cf" + i); IBits docsWithBdv = r.GetDocsWithField("f" + i); IBits docsWithControl = r.GetDocsWithField("cf" + i); IBits liveDocs = r.LiveDocs; for (int j = 0; j < r.MaxDoc; j++) { if (liveDocs == null || liveDocs.Get(j)) { Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j)); if (docsWithBdv.Get(j)) { long ctrlValue = control.Get(j); long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2; // if (ctrlValue != bdvValue) { // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch); // } Assert.AreEqual(ctrlValue, bdvValue); } } } } } reader.Dispose(); dir.Dispose(); }
/// <summary> /// Creates index upgrader on the given directory, using an <seealso cref="IndexWriter"/> using the given /// config. You have the possibility to upgrade indexes with multiple commit points by removing /// all older ones. /// </summary> public IndexUpgrader(Directory dir, IndexWriterConfig iwc, bool deletePriorCommits) { this.Dir = dir; this.Iwc = iwc; this.DeletePriorCommits = deletePriorCommits; }
public virtual void TestManyReopensAndFields() { Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); LogMergePolicy lmp = NewLogMergePolicy(); lmp.MergeFactor = 3; // merge often conf.SetMergePolicy(lmp); IndexWriter writer = new IndexWriter(dir, conf); bool isNRT = random.NextBoolean(); DirectoryReader reader; if (isNRT) { reader = DirectoryReader.Open(writer, true); } else { writer.Commit(); reader = DirectoryReader.Open(dir); } int numFields = random.Next(4) + 3; // 3-7 int numNDVFields = random.Next(numFields / 2) + 1; // 1-3 long[] fieldValues = new long[numFields]; bool[] fieldHasValue = new bool[numFields]; Arrays.Fill(fieldHasValue, true); for (int i = 0; i < fieldValues.Length; i++) { fieldValues[i] = 1; } int numRounds = AtLeast(15); int docID = 0; for (int i = 0; i < numRounds; i++) { int numDocs = AtLeast(5); // System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs); for (int j = 0; j < numDocs; j++) { Document doc = new Document(); doc.Add(new StringField("id", "doc-" + docID, Store.NO)); doc.Add(new StringField("key", "all", Store.NO)); // update key // add all fields with their current value for (int f = 0; f < fieldValues.Length; f++) { if (f < numNDVFields) { doc.Add(new NumericDocValuesField("f" + f, fieldValues[f])); } else { doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f]))); } } writer.AddDocument(doc); ++docID; } // if field's value was unset before, unset it from all new added documents too for (int field = 0; field < fieldHasValue.Length; field++) { if (!fieldHasValue[field]) { if (field < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null); } } } int fieldIdx = random.Next(fieldValues.Length); string updateField = "f" + fieldIdx; if (random.NextBoolean()) { // System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'"); fieldHasValue[fieldIdx] = false; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null); } } else { fieldHasValue[fieldIdx] = true; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx])); } // System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]); } if (random.NextDouble() < 0.2) { int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok! writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc)); // System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc); } // verify reader if (!isNRT) { writer.Commit(); } // System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader); DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader); Assert.IsNotNull(newReader); reader.Dispose(); reader = newReader; // System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader); Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; // System.out.println(((SegmentReader) r).getSegmentName()); IBits liveDocs = r.LiveDocs; for (int field = 0; field < fieldValues.Length; field++) { string f = "f" + field; BinaryDocValues bdv = r.GetBinaryDocValues(f); NumericDocValues ndv = r.GetNumericDocValues(f); IBits docsWithField = r.GetDocsWithField(f); if (field < numNDVFields) { Assert.IsNotNull(ndv); Assert.IsNull(bdv); } else { Assert.IsNull(ndv); Assert.IsNotNull(bdv); } int maxDoc = r.MaxDoc; for (int doc = 0; doc < maxDoc; doc++) { if (liveDocs == null || liveDocs.Get(doc)) { // System.out.println("doc=" + (doc + context.DocBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch)); if (fieldHasValue[field]) { Assert.IsTrue(docsWithField.Get(doc)); if (field < numNDVFields) { Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } else { Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } } else { Assert.IsFalse(docsWithField.Get(doc)); } } } } } // System.out.println(); } IOUtils.Dispose(writer, reader, dir); }
public static IndexWriter MockIndexWriter(Directory dir, IndexWriterConfig conf, ITestPoint testPoint) { conf.SetInfoStream(new TestPointInfoStream(conf.InfoStream, testPoint)); return(new IndexWriter(dir, conf)); }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs HashSet <string> updateTerms = new HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1)) { // Otherwise test can take way too long (> 2 hours) //numTerms /= 2; // LUCENENET specific - To keep this under the 1 hour free limit // of Azure DevOps, this was reduced from /2 to /6. numTerms /= 6; } if (Verbose) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i, CultureInfo.InvariantCulture); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList.Shuffle(Random); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int threadCount = TestUtil.NextInt32(Random, 1, 5); if (Verbose) { Console.WriteLine("config: " + iw.IndexWriter.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType(prototype.FieldType); if (Random.NextBoolean()) { fieldType.OmitNorms = true; } int options = Random.Next(3); if (options == 0) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadJob[] threads = new ThreadJob[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new Random(Random.Next()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousClass(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadJob t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.GetReader(); Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Count); TermsEnum termsEnum = terms.GetEnumerator(); while (termsEnum.MoveNext()) { int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture); Assert.AreEqual(value, termsEnum.TotalTermFreq); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random, dir, Iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random.NextBoolean(); ft.StoreTermVectorPositions = Random.NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq; for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset; Debug.Assert(start >= 0); int end = dp.EndOffset; Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq; for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset; Debug.Assert(start >= 0); int end = dp.EndOffset; Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) : this(input, dir1, dir2, new TermRangeFilter(midTerm.Field, null, midTerm.Bytes, true, false), config1, config2) { }
public override void SetUp() { base.SetUp(); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); }
public virtual void TestTooManySegments() { Directory dir = GetAssertNoDeletesDirectory(NewDirectory()); // Don't use newIndexWriterConfig, because we need a // "sane" mergePolicy: IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter w = new IndexWriter(dir, iwc); // Create 500 segments: for (int i = 0; i < 500; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); IndexReader r = DirectoryReader.Open(w, true); // Make sure segment count never exceeds 100: Assert.IsTrue(r.Leaves().Count < 100); r.Dispose(); } w.Dispose(); dir.Dispose(); }
/* * Run one indexer and 2 searchers against single index as * stress test. */ public virtual void RunTest(Directory directory) { TimedThread[] threads = new TimedThread[4]; IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(7); ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3; IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random); // Establish a base index of 100 docs: for (int i = 0; i < 100; i++) { Documents.Document d = new Documents.Document(); d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES)); d.Add(NewTextField("contents", English.Int32ToEnglish(i), Field.Store.NO)); if ((i - 1) % 7 == 0) { writer.Commit(); } writer.AddDocument(d); } writer.Commit(); IndexReader r = DirectoryReader.Open(directory); Assert.AreEqual(100, r.NumDocs); r.Dispose(); IndexerThread indexerThread = new IndexerThread(writer, threads); threads[0] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(writer, threads); threads[1] = indexerThread2; indexerThread2.Start(); SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[2] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[3] = searcherThread2; searcherThread2.Start(); indexerThread.Join(); indexerThread2.Join(); searcherThread1.Join(); searcherThread2.Join(); writer.Dispose(); Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer"); Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2"); Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1"); Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2"); //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
public override void SetUp() { base.SetUp(); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); }
/// <summary> /// Creates index upgrader on the given directory, using an <seealso cref="IndexWriter"/> using the given /// config. You have the possibility to upgrade indexes with multiple commit points by removing /// all older ones. /// </summary> public IndexUpgrader(Directory dir, IndexWriterConfig iwc, bool deletePriorCommits) { this.Dir = dir; this.Iwc = iwc; this.DeletePriorCommits = deletePriorCommits; }
public MyIndexWriter(Directory dir, IndexWriterConfig conf) : base(dir, conf) { }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestWriteReadMerge() { // get another codec, other than the default: so we are merging segments across different codecs Codec otherCodec; if ("SimpleText".Equals(Codec.Default.Name, StringComparison.Ordinal)) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); int docCount = AtLeast(200); var data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { int fieldCount = Rarely() ? RandomInts.NextIntBetween(Random(), 1, 500) : RandomInts.NextIntBetween(Random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { int length = Rarely() ? Random().Next(1000) : Random().Next(10); int max = Rarely() ? 256 : 2; data[i][j] = RandomByteArray(length, max); } } FieldType type = new FieldType(StringField.TYPE_STORED); type.IsIndexed = false; type.Freeze(); Int32Field id = new Int32Field("id", 0, Field.Store.YES); for (int i = 0; i < data.Length; ++i) { Document doc = new Document(); doc.Add(id); id.SetInt32Value(i); for (int j = 0; j < data[i].Length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.Add(f); } iw.w.AddDocument(doc); if (Random().NextBoolean() && (i % (data.Length / 10) == 0)) { iw.w.Dispose(); // test merging against a non-compressing codec if (iwConf.Codec == otherCodec) { iwConf.SetCodec(Codec.Default); } else { iwConf.SetCodec(otherCodec); } iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); } } for (int i = 0; i < 10; ++i) { int min = Random().Next(data.Length); int max = min + Random().Next(20); iw.DeleteDocuments(NumericRangeQuery.NewInt32Range("id", min, max, true, false)); } iw.ForceMerge(2); // force merges with deletions iw.Commit(); DirectoryReader ir = DirectoryReader.Open(dir); Assert.IsTrue(ir.NumDocs > 0); int numDocs = 0; for (int i = 0; i < ir.MaxDoc; ++i) { Document doc = ir.Document(i); if (doc == null) { continue; } ++numDocs; int docId = (int)doc.GetField("id").GetInt32Value(); Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count); for (int j = 0; j < data[docId].Length; ++j) { var arr = data[docId][j]; BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j); var arr2 = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length); Assert.AreEqual(arr, arr2); } } Assert.IsTrue(ir.NumDocs <= numDocs); ir.Dispose(); iw.DeleteAll(); iw.Commit(); iw.ForceMerge(1); iw.Dispose(); dir.Dispose(); }
internal readonly Codec Codec; // for writing new segments /// <summary> /// Constructs a new IndexWriter per the settings given in <code>conf</code>. /// If you want to make "live" changes to this writer instance, use /// <seealso cref="#getConfig()"/>. /// /// <p> /// <b>NOTE:</b> after ths writer is created, the given configuration instance /// cannot be passed to another writer. If you intend to do so, you should /// <seealso cref="IndexWriterConfig#clone() clone"/> it beforehand. /// </summary> /// <param name="d"> /// the index directory. The index is either created or appended /// according <code>conf.getOpenMode()</code>. </param> /// <param name="conf"> /// the configuration settings according to which IndexWriter should /// be initialized. </param> /// <exception cref="IOException"> /// if the directory cannot be read/written to, or if it does not /// exist and <code>conf.getOpenMode()</code> is /// <code>OpenMode.APPEND</code> or if there is any other low-level /// IO error </exception> public IndexWriter(Directory d, IndexWriterConfig conf) { /*if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; }*/ readerPool = new ReaderPool(this); conf.SetIndexWriter(this); // prevent reuse by other instances Config_Renamed = new LiveIndexWriterConfig(conf); directory = d; analyzer = Config_Renamed.Analyzer; infoStream = Config_Renamed.InfoStream; mergePolicy = Config_Renamed.MergePolicy; mergePolicy.IndexWriter = this; mergeScheduler = Config_Renamed.MergeScheduler; Codec = Config_Renamed.Codec; BufferedUpdatesStream = new BufferedUpdatesStream(infoStream); PoolReaders = Config_Renamed.ReaderPooling; WriteLock = directory.MakeLock(WRITE_LOCK_NAME); if (!WriteLock.Obtain(Config_Renamed.WriteLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + WriteLock); } bool success = false; try { OpenMode_e? mode = Config_Renamed.OpenMode; bool create; if (mode == OpenMode_e.CREATE) { create = true; } else if (mode == OpenMode_e.APPEND) { create = false; } else { // CREATE_OR_APPEND - create only if an index does not exist create = !DirectoryReader.IndexExists(directory); } // If index is too old, reading the segments will throw // IndexFormatTooOldException. segmentInfos = new SegmentInfos(); bool initialIndexExists = true; if (create) { // Try to read first. this is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.Read(directory); segmentInfos.Clear(); } catch (IOException) { // Likely this means it's a fresh directory initialIndexExists = false; } // Record that we have a change (zero out all // segments) pending: Changed(); } else { segmentInfos.Read(directory); IndexCommit commit = Config_Renamed.IndexCommit; if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. this is important if // readers are open against the future commit // points. if (commit.Directory != directory) { throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); } SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); Changed(); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: loaded commit \"" + commit.SegmentsFileName + "\""); } } } RollbackSegments = segmentInfos.CreateBackupSegmentInfos(); // start with previous field numbers, but new FieldInfos GlobalFieldNumberMap = FieldNumberMap; Config_Renamed.FlushPolicy.Init(Config_Renamed); DocWriter = new DocumentsWriter(this, Config_Renamed, directory); eventQueue = DocWriter.EventQueue(); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: lock (this) { Deleter = new IndexFileDeleter(directory, Config_Renamed.DelPolicy, segmentInfos, infoStream, this, initialIndexExists); } if (Deleter.StartingCommitDeleted) { // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. Changed(); } if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: hit exception on init; releasing write lock"); } WriteLock.Release(); IOUtils.CloseWhileHandlingException(WriteLock); WriteLock = null; } } }
public virtual void TestMaxMergeCount() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); int maxMergeCount = TestUtil.NextInt(Random(), 1, 5); int maxMergeThreads = TestUtil.NextInt(Random(), 1, maxMergeCount); CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount); AtomicInteger runningMergeCount = new AtomicInteger(0); AtomicBoolean failed = new AtomicBoolean(); if (VERBOSE) { Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads); } ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed); cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads); iwc.SetMergeScheduler(cms); iwc.SetMaxBufferedDocs(2); TieredMergePolicy tmp = new TieredMergePolicy(); iwc.SetMergePolicy(tmp); tmp.MaxMergeAtOnce = 2; tmp.SegmentsPerTier = 2; IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED)); while (enoughMergesWaiting.CurrentCount != 0 && !failed.Get()) { for (int i = 0; i < 10; i++) { w.AddDocument(doc); } } w.Dispose(false); dir.Dispose(); }
private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf) { conf.SetMergePolicy(new LogDocMergePolicy()); IndexWriter writer = new IndexWriter(dir, conf); return writer; }
/// <summary> /// create a new index writer config with random defaults using the specified random </summary> public static IndexWriterConfig NewIndexWriterConfig(Random r, LuceneVersion v, Analyzer a) { IndexWriterConfig c = new IndexWriterConfig(v, a); c.SetSimilarity(ClassEnvRule.Similarity); if (VERBOSE) { // Even though TestRuleSetupAndRestoreClassEnv calls // InfoStream.setDefault, we do it again here so that // the PrintStreamInfoStream.messageID increments so // that when there are separate instances of // IndexWriter created we see "IW 0", "IW 1", "IW 2", // ... instead of just always "IW 0": c.InfoStream = new TestRuleSetupAndRestoreClassEnv.ThreadNameFixingPrintStreamInfoStream(Console.Out); } if (r.NextBoolean()) { c.SetMergeScheduler(new SerialMergeScheduler()); } else if (Rarely(r)) { int maxThreadCount = TestUtil.NextInt(Random(), 1, 4); int maxMergeCount = TestUtil.NextInt(Random(), maxThreadCount, maxThreadCount + 4); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount); c.SetMergeScheduler(cms); } if (r.NextBoolean()) { if (Rarely(r)) { // crazy value c.SetMaxBufferedDocs(TestUtil.NextInt(r, 2, 15)); } else { // reasonable value c.SetMaxBufferedDocs(TestUtil.NextInt(r, 16, 1000)); } } if (r.NextBoolean()) { if (Rarely(r)) { // crazy value c.SetTermIndexInterval(r.NextBoolean() ? TestUtil.NextInt(r, 1, 31) : TestUtil.NextInt(r, 129, 1000)); } else { // reasonable value c.SetTermIndexInterval(TestUtil.NextInt(r, 32, 128)); } } if (r.NextBoolean()) { int maxNumThreadStates = Rarely(r) ? TestUtil.NextInt(r, 5, 20) : TestUtil.NextInt(r, 1, 4); // reasonable value - crazy value if (Rarely(r)) { // Retrieve the package-private setIndexerThreadPool // method: MethodInfo setIndexerThreadPoolMethod = typeof(IndexWriterConfig).GetMethod("SetIndexerThreadPool", new Type[] { typeof(DocumentsWriterPerThreadPool) }); //setIndexerThreadPoolMethod.setAccessible(true); Type clazz = typeof(RandomDocumentsWriterPerThreadPool); ConstructorInfo ctor = clazz.GetConstructor(new[] { typeof(int), typeof(Random) }); //ctor.Accessible = true; // random thread pool setIndexerThreadPoolMethod.Invoke(c, new[] { ctor.Invoke(new object[] { maxNumThreadStates, r }) }); } else { // random thread pool c.SetMaxThreadStates(maxNumThreadStates); } } c.SetMergePolicy(NewMergePolicy(r)); if (Rarely(r)) { c.SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.InfoStream)); } c.SetUseCompoundFile(r.NextBoolean()); c.SetReaderPooling(r.NextBoolean()); c.SetReaderTermsIndexDivisor(TestUtil.NextInt(r, 1, 4)); c.SetCheckIntegrityAtMerge(r.NextBoolean()); return c; }
public virtual void TestDeletesCheckIndexOutput() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMaxBufferedDocs(2); IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); doc.Add(NewField("field", "0", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); doc = new Document(); doc.Add(NewField("field", "1", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); w.Commit(); Assert.AreEqual(1, w.SegmentCount); w.DeleteDocuments(new Term("field", "0")); w.Commit(); Assert.AreEqual(1, w.SegmentCount); w.Dispose(); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); //MemoryStream bos = new MemoryStream(1024); CheckIndex checker = new CheckIndex(dir); checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); CheckIndex.Status indexStatus = checker.DoCheckIndex(null); Assert.IsTrue(indexStatus.Clean); checker.FlushInfoStream(); string s = bos.ToString(); // Segment should have deletions: Assert.IsTrue(s.Contains("has deletions"), "string was: " + s); w = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); w.ForceMerge(1); w.Dispose(); bos = new ByteArrayOutputStream(1024); checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); indexStatus = checker.DoCheckIndex(null); Assert.IsTrue(indexStatus.Clean); checker.FlushInfoStream(); s = bos.ToString(); Assert.IsFalse(s.Contains("has deletions")); dir.Dispose(); }
public virtual void TestLiveChangeToCFS() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy(true)); // Start false: iwc.SetUseCompoundFile(false); iwc.MergePolicy.NoCFSRatio = 0.0d; IndexWriter w = new IndexWriter(dir, iwc); // Change to true: w.Config.SetUseCompoundFile(true); Document doc = new Document(); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit"); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); w.ForceMerge(1); w.Commit(); // no compound files after merge Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge"); MergePolicy lmp = w.Config.MergePolicy; lmp.NoCFSRatio = 1.0; lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; w.AddDocument(doc); w.ForceMerge(1); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge"); w.Dispose(); dir.Dispose(); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } Collections.Shuffle(postingsList); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType(prototype.FieldType); if (Random().NextBoolean()) { fieldType.OmitNorms = true; } int options = Random().Next(3); if (options == 0) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadClass[] threads = new ThreadClass[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new Random(Random().Next()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Count); TermsEnum termsEnum = terms.GetIterator(null); BytesRef termBR; while ((termBR = termsEnum.Next()) != null) { int value = Convert.ToInt32(termBR.Utf8ToString()); Assert.AreEqual(value, termsEnum.TotalTermFreq); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestDocsWithField() { AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (Random.Next(4) >= 0) { doc.Add(new NumericDocValuesField("numbers", Random.NextInt64())); } doc.Add(new NumericDocValuesField("numbersAlways", Random.NextInt64())); iw.AddDocument(doc); if (Random.Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.GetReader(); iw.ForceMerge(1); DirectoryReader ir2 = iw.GetReader(); AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); IBits multi = MultiDocValues.GetDocsWithField(ir, "numbers"); IBits single = merged.GetDocsWithField("numbers"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } } multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways"); single = merged.GetDocsWithField("numbersAlways"); Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void TestVariableBinary([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BVariableBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); var bytes = new byte[4]; ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes); BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { encoder.Reset(bytes); encoder.WriteVInt(i % 65535); // 1, 2, or 3 bytes data.Length = encoder.Position; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; ByteArrayDataInput input = new ByteArrayDataInput(); foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(bytes); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { dv.Get(i, scratch); input.Reset((byte[])(Array)scratch.Bytes, scratch.Offset, scratch.Length); Assert.AreEqual(expectedValue % 65535, input.ReadVInt()); Assert.IsTrue(input.Eof()); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { int numThreads = 1 + Random.Next(8); int numDocumentsToIndex = 50 + AtLeast(70); AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); iwc.SetFlushPolicy(flushPolicy); int numDWPT = 1 + Random.Next(8); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); if (flushPolicy.FlushOnRAM && !flushPolicy.FlushOnDocCount && !flushPolicy.FlushOnDeleteTerms) { long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); if (flushPolicy.HasMarkedPending) { assertTrue("max: " + maxRAMBytes + " " + flushControl.peakActiveBytes, maxRAMBytes <= flushControl.peakActiveBytes); } } AssertActiveBytesAfter(flushControl); writer.Commit(); Assert.AreEqual(0, flushControl.ActiveBytes); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(numDocumentsToIndex, r.NumDocs); Assert.AreEqual(numDocumentsToIndex, r.MaxDoc); if (!flushPolicy.FlushOnRAM) { assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.WasStalled); assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.HasBlocked); } r.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestInvalidValues() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Test IndexDeletionPolicy Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.IndexDeletionPolicy.GetType()); conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.IndexDeletionPolicy.GetType()); try { conf.SetIndexDeletionPolicy(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test MergeScheduler #if FEATURE_TASKMERGESCHEDULER Assert.AreEqual(typeof(TaskMergeScheduler), conf.MergeScheduler.GetType()); #else Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); #endif conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType()); try { conf.SetMergeScheduler(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test Similarity: // we shouldnt assert what the default is, just that its not null. Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); conf.SetSimilarity(new MySimilarity()); Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType()); try { conf.SetSimilarity(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test IndexingChain Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); conf.SetIndexingChain(new MyIndexingChain()); Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType()); try { conf.SetIndexingChain(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } try { conf.SetMaxBufferedDeleteTerms(0); Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetMaxBufferedDocs(1); Assert.Fail("should not have succeeded to set maxBufferedDocs to 1"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { // Disable both MAX_BUF_DOCS and RAM_SIZE_MB conf.SetMaxBufferedDocs(4); conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); try { conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Test setReaderTermsIndexDivisor try { conf.SetReaderTermsIndexDivisor(0); Assert.Fail("should not have succeeded to set termsIndexDivisor to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Setting to -1 is ok conf.SetReaderTermsIndexDivisor(-1); try { conf.SetReaderTermsIndexDivisor(-2); Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(2048); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(0); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Test MergePolicy Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); conf.SetMergePolicy(new LogDocMergePolicy()); Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType()); try { conf.SetMergePolicy(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } }
// [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.IsIndexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.SetStringValue("" + i); iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IIndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public virtual void TestDefaults() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); Assert.AreEqual(typeof(MockAnalyzer), conf.Analyzer.GetType()); Assert.IsNull(conf.IndexCommit); Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.IndexDeletionPolicy.GetType()); #if FEATURE_TASKMERGESCHEDULER Assert.AreEqual(typeof(TaskMergeScheduler), conf.MergeScheduler.GetType()); #else Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); #endif Assert.AreEqual(OpenMode.CREATE_OR_APPEND, conf.OpenMode); // we don't need to assert this, it should be unspecified Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); Assert.AreEqual(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.TermIndexInterval); Assert.AreEqual(IndexWriterConfig.DefaultWriteLockTimeout, conf.WriteLockTimeout); Assert.AreEqual(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.DefaultWriteLockTimeout); Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.MaxBufferedDeleteTerms); Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.RAMBufferSizeMB, 0.0); Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.MaxBufferedDocs); Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_POOLING, conf.UseReaderPooling); Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); Assert.IsNull(conf.MergedSegmentWarmer); Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.ReaderTermsIndexDivisor); Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); Assert.AreEqual(typeof(ThreadAffinityDocumentsWriterThreadPool), conf.IndexerThreadPool.GetType()); Assert.AreEqual(typeof(FlushByRamOrCountsPolicy), conf.FlushPolicy.GetType()); Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.RAMPerThreadHardLimitMB); Assert.AreEqual(Codec.Default, conf.Codec); Assert.AreEqual(InfoStream.Default, conf.InfoStream); Assert.AreEqual(IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM, conf.UseCompoundFile); // Sanity check - validate that all getters are covered. HashSet <string> getters = new HashSet <string>(); getters.Add("getAnalyzer"); getters.Add("getIndexCommit"); getters.Add("getIndexDeletionPolicy"); getters.Add("getMaxFieldLength"); getters.Add("getMergeScheduler"); getters.Add("getOpenMode"); getters.Add("getSimilarity"); getters.Add("getTermIndexInterval"); getters.Add("getWriteLockTimeout"); getters.Add("getDefaultWriteLockTimeout"); getters.Add("getMaxBufferedDeleteTerms"); getters.Add("getRAMBufferSizeMB"); getters.Add("getMaxBufferedDocs"); getters.Add("getIndexingChain"); getters.Add("getMergedSegmentWarmer"); getters.Add("getMergePolicy"); getters.Add("getMaxThreadStates"); getters.Add("getReaderPooling"); getters.Add("getIndexerThreadPool"); getters.Add("getReaderTermsIndexDivisor"); getters.Add("getFlushPolicy"); getters.Add("getRAMPerThreadHardLimitMB"); getters.Add("getCodec"); getters.Add("getInfoStream"); getters.Add("getUseCompoundFile"); foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods()) { if (m.DeclaringType == typeof(IndexWriterConfig) && m.Name.StartsWith("get", StringComparison.Ordinal) && !m.Name.StartsWith("get_", StringComparison.Ordinal)) { Assert.IsTrue(getters.Contains(m.Name), "method " + m.Name + " is not tested for defaults"); } } }
public virtual void TestInvalidValues() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Test IndexDeletionPolicy Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType()); conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.DelPolicy.GetType()); try { conf.SetIndexDeletionPolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test MergeScheduler Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType()); try { conf.SetMergeScheduler(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test Similarity: // we shouldnt assert what the default is, just that its not null. Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); conf.SetSimilarity(new MySimilarity()); Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType()); try { conf.SetSimilarity(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test IndexingChain Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); conf.SetIndexingChain(new MyIndexingChain()); Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType()); try { conf.SetIndexingChain(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } try { conf.SetMaxBufferedDeleteTerms(0); Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetMaxBufferedDocs(1); Assert.Fail("should not have succeeded to set maxBufferedDocs to 1"); } catch (System.ArgumentException e) { // this is expected } try { // Disable both MAX_BUF_DOCS and RAM_SIZE_MB conf.SetMaxBufferedDocs(4); conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); } catch (System.ArgumentException e) { // this is expected } conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); try { conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); } catch (System.ArgumentException e) { // this is expected } // Test setReaderTermsIndexDivisor try { conf.SetReaderTermsIndexDivisor(0); Assert.Fail("should not have succeeded to set termsIndexDivisor to 0"); } catch (System.ArgumentException e) { // this is expected } // Setting to -1 is ok conf.SetReaderTermsIndexDivisor(-1); try { conf.SetReaderTermsIndexDivisor(-2); Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(2048); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(0); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); } catch (System.ArgumentException e) { // this is expected } // Test MergePolicy Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); conf.SetMergePolicy(new LogDocMergePolicy()); Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType()); try { conf.SetMergePolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } }
public virtual void TestVariableBinary([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BVariableBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); var bytes = new byte[4]; ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes); BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { encoder.Reset(bytes); encoder.WriteVInt32(i % 65535); // 1, 2, or 3 bytes data.Length = encoder.Position; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; ByteArrayDataInput input = new ByteArrayDataInput(); foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(bytes); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { dv.Get(i, scratch); input.Reset(scratch.Bytes, scratch.Offset, scratch.Length); Assert.AreEqual(expectedValue % 65535, input.ReadVInt32()); Assert.IsTrue(input.Eof); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestDefaults() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); Assert.AreEqual(typeof(MockAnalyzer), conf.Analyzer.GetType()); Assert.IsNull(conf.IndexCommit); Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType()); Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); Assert.AreEqual(OpenMode_e.CREATE_OR_APPEND, conf.OpenMode); // we don't need to assert this, it should be unspecified Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); Assert.AreEqual(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.TermIndexInterval); Assert.AreEqual(IndexWriterConfig.DefaultWriteLockTimeout, conf.WriteLockTimeout); Assert.AreEqual(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.DefaultWriteLockTimeout); Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.MaxBufferedDeleteTerms); Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.RAMBufferSizeMB, 0.0); Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.MaxBufferedDocs); Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_POOLING, conf.ReaderPooling); Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); Assert.IsNull(conf.MergedSegmentWarmer); Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.ReaderTermsIndexDivisor); Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); Assert.AreEqual(typeof(ThreadAffinityDocumentsWriterThreadPool), conf.IndexerThreadPool.GetType()); Assert.AreEqual(typeof(FlushByRamOrCountsPolicy), conf.FlushPolicy.GetType()); Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.RAMPerThreadHardLimitMB); Assert.AreEqual(Codec.Default, conf.Codec); Assert.AreEqual(InfoStream.Default, conf.InfoStream); Assert.AreEqual(IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM, conf.UseCompoundFile); // Sanity check - validate that all getters are covered. HashSet<string> getters = new HashSet<string>(); getters.Add("getAnalyzer"); getters.Add("getIndexCommit"); getters.Add("getIndexDeletionPolicy"); getters.Add("getMaxFieldLength"); getters.Add("getMergeScheduler"); getters.Add("getOpenMode"); getters.Add("getSimilarity"); getters.Add("getTermIndexInterval"); getters.Add("getWriteLockTimeout"); getters.Add("getDefaultWriteLockTimeout"); getters.Add("getMaxBufferedDeleteTerms"); getters.Add("getRAMBufferSizeMB"); getters.Add("getMaxBufferedDocs"); getters.Add("getIndexingChain"); getters.Add("getMergedSegmentWarmer"); getters.Add("getMergePolicy"); getters.Add("getMaxThreadStates"); getters.Add("getReaderPooling"); getters.Add("getIndexerThreadPool"); getters.Add("getReaderTermsIndexDivisor"); getters.Add("getFlushPolicy"); getters.Add("getRAMPerThreadHardLimitMB"); getters.Add("getCodec"); getters.Add("getInfoStream"); getters.Add("getUseCompoundFile"); foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods()) { if (m.DeclaringType == typeof(IndexWriterConfig) && m.Name.StartsWith("get") && !m.Name.StartsWith("get_")) { Assert.IsTrue(getters.Contains(m.Name), "method " + m.Name + " is not tested for defaults"); } } }
public virtual void TestFixedBinary([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); var bytes = new byte[4]; BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 24); bytes[1] = (byte)(i >> 16); bytes[2] = (byte)(i >> 8); bytes[3] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 24); bytes[1] = (byte)(expectedValue >> 16); bytes[2] = (byte)(expectedValue >> 8); bytes[3] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }