public virtual void TestIndexedBit() { using Directory dir = NewDirectory(); IndexReader r = null; try { using (RandomIndexWriter w = new RandomIndexWriter(Random, dir)) { Document doc = new Document(); FieldType onlyStored = new FieldType(); onlyStored.IsStored = true; doc.Add(new Field("field", "value", onlyStored)); doc.Add(new StringField("field2", "value", Field.Store.YES)); w.AddDocument(doc); r = w.GetReader(); } // w.Dispose(); Assert.IsFalse(r.Document(0).GetField("field").IndexableFieldType.IsIndexed); Assert.IsTrue(r.Document(0).GetField("field2").IndexableFieldType.IsIndexed); } finally { r?.Dispose(); } }
/// <summary> /// Dispose(bool disposing) executes in two distinct scenarios. /// If disposing equals true, the method has been called directly /// or indirectly by a user's code. Managed and unmanaged resources /// can be disposed. /// If disposing equals false, the method has been called by the /// runtime from inside the finalizer and you should not reference /// other objects. Only unmanaged resources can be disposed. /// </summary> protected virtual void Dispose(bool disposing) { // Check to see if Dispose has already been called. if (!this._disposed) { // Note disposing has been done. _disposed = true; // If disposing equals true, dispose all managed // and unmanaged resources. if (disposing) { if (_reader != null) { _reader.Dispose(); } if (_facetReader != null) { _facetReader.Dispose(); } } // Call the appropriate methods to clean up // unmanaged resources here. _reader = null; _facetReader = null; } }
public void TestFieldNonExistent() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "nonexistent_field"); it = ld.EntryIterator; assertNull("More elements than expected", spare = it.Next()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public virtual void TestCloseWithThreads([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { int NUM_THREADS = 3; int numIterations = TEST_NIGHTLY ? 7 : 3; for (int iter = 0; iter < numIterations; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter); } Directory dir = NewDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(10) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(4)); IndexWriter writer = new IndexWriter(dir, config); var scheduler = config.mergeScheduler as IConcurrentMergeScheduler; if (scheduler != null) { scheduler.SetSuppressExceptions(); } IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, false, NewField) // LUCENENET NOTE - ConcurrentMergeScheduler // used to take too long for this test to index a single document // so, increased the time from 200 to 300 ms. // But it has now been restored to 200 ms like Lucene. { TimeToRunInMilliseconds = 200 }; } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } bool done = false; while (!done) { Thread.Sleep(100); for (int i = 0; i < NUM_THREADS; i++) // only stop when at least one thread has added a doc { if (threads[i].AddCount > 0) { done = true; break; } else if (!threads[i].IsAlive) { Assert.Fail("thread failed before indexing a single document"); } } } if (VERBOSE) { Console.WriteLine("\nTEST: now close"); } writer.Dispose(false); // Make sure threads that are adding docs are not hung: for (int i = 0; i < NUM_THREADS; i++) { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].Join(); if (threads[i].IsAlive) { Assert.Fail("thread seems to be hung"); } } // Quick test to make sure index is not corrupt: IndexReader reader = DirectoryReader.Open(dir); DocsEnum tdocs = TestUtil.Docs(Random, reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0); int count = 0; while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } Assert.IsTrue(count > 0); reader.Dispose(); dir.Dispose(); } }
public void TestFieldContents_1() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.EntryIterator; assertNotNull("First element doesn't exist.", spare = it.Next()); assertTrue("First element isn't correct", spare.Utf8ToString().equals("Jerry")); assertNotNull("Second element doesn't exist.", spare = it.Next()); assertTrue("Second element isn't correct", spare.Utf8ToString().equals("Tom")); assertNull("More elements than expected", it.Next()); ld = new LuceneDictionary(indexReader, "contents"); it = ld.EntryIterator; int counter = 2; while (it.Next() != null) { counter--; } assertTrue("Number of words incorrect", counter == 0); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public void TestFieldZzz() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "zzz"); it = ld.EntryIterator; assertNotNull("First element doesn't exist.", spare = it.Next()); assertEquals("First element isn't correct", "bar", spare.Utf8ToString()); assertNull("More elements than expected", it.Next()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { Console.WriteLine("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.ForceMerge(1); writer.Dispose(); } Assert.AreEqual(needsMerging ? 2 : 1, policy.NumOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(1 + (needsMerging ? 1 : 0), policy.NumOnCommit); // Test listCommits ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); // 2 from closing writer Assert.AreEqual(1 + (needsMerging ? 1 : 0), commits.Count); // Make sure we can open a reader on each commit: foreach (IndexCommit commit in commits) { IndexReader r = DirectoryReader.Open(commit); r.Dispose(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy)); writer.Dispose(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Dispose(); } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { const int N = 10; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepLastNDeletionPolicy(this, N)).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.Dispose(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10); mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int j = 0; j < 17; j++) { AddDocWithID(writer, i * (N + 1) + j); } // this is a commit writer.Dispose(); conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetIndexDeletionPolicy(policy).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.DeleteDocuments(new Term("id", "" + (i * (N + 1) + 3))); // this is a commit writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(16, hits.Length); reader.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy)); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; // this will not commit: there are no changes // pending because we opened for "create": writer.Dispose(); } Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnInit); Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnCommit); IndexReader rwReader = DirectoryReader.Open(dir); IndexSearcher searcher_ = NewSearcher(rwReader); ScoreDoc[] hits_ = searcher_.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits_.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetLastCommitGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; rwReader.Dispose(); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.Open(dir); // Work backwards in commits on what the expected // count should be. searcher_ = NewSearcher(reader); hits_ = searcher_.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(expectedCount, hits_.Length); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } reader.Dispose(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (IOException /*e*/) { if (i != N) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Dispose(); } }
public virtual void Test2BTerms_Mem() { if ("Lucene3x".Equals(Codec.Default.Name)) { throw new Exception("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } dir.CheckIndexOnClose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random(), TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = Environment.TickCount; w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec"); } savedTerms = ts.SavedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms)); Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy())); // should be in sync with value in TermInfosWriter const int skipInterval = 16; const int numTerms = 5; const string fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); StringBuilder sb = new StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].Text()); sb.Append(" "); } string content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; var payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(NewTextField(fieldName, content, Field.Store.NO)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d, analyzer); } // make sure we create more than one segment to test merging writer.Commit(); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d, analyzer); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = DirectoryReader.Open(dir); var verifyPayloadData = new byte[payloadDataLength]; offset = 0; var tps = new DocsAndPositionsEnum[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text())); } while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 1; i < numTerms; i++) { tps[i].NextDoc(); } int freq = tps[0].Freq; for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(); BytesRef br = tps[j].GetPayload(); if (br != null) { Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length); offset += br.Length; } } } } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text())); tp.NextDoc(); tp.NextPosition(); // NOTE: prior rev of this test was failing to first // call next here: tp.NextDoc(); // now we don't read this payload tp.NextPosition(); BytesRef payload = tp.GetPayload(); Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]); tp.NextDoc(); tp.NextPosition(); // we don't read this payload and skip to a different document tp.Advance(5); tp.NextPosition(); payload = tp.GetPayload(); Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text())); tp.NextDoc(); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(2 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(3 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayload().Length, "Wrong payload length."); reader.Dispose(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE)); string singleTerm = "lucene"; d = new Document(); d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d); writer.ForceMerge(1); // flush writer.Dispose(); reader = DirectoryReader.Open(dir); tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm)); tp.NextDoc(); tp.NextPosition(); BytesRef bref = tp.GetPayload(); verifyPayloadData = new byte[bref.Length]; var portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length); reader.Dispose(); }
public virtual void TestUnsupportedOldIndexes() { for (int i = 0; i < UnsupportedNames.Length; i++) { if (VERBOSE) { Console.WriteLine("TEST: index " + UnsupportedNames[i]); } DirectoryInfo oldIndexDir = CreateTempDir(UnsupportedNames[i]); using (Stream dataFile = this.GetType().GetTypeInfo().Assembly.FindAndGetManifestResourceStream(GetType(), "unsupported." + UnsupportedNames[i] + ".zip")) { TestUtil.Unzip(dataFile, oldIndexDir); } BaseDirectoryWrapper dir = NewFSDirectory(oldIndexDir); // don't checkindex, these are intentionally not supported dir.CheckIndexOnClose = false; IndexReader reader = null; IndexWriter writer = null; try { reader = DirectoryReader.Open(dir); Assert.Fail("DirectoryReader.open should not pass for " + UnsupportedNames[i]); } #pragma warning disable 168 catch (IndexFormatTooOldException e) #pragma warning restore 168 { // pass } finally { if (reader != null) { reader.Dispose(); } reader = null; } try { writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Assert.Fail("IndexWriter creation should not pass for " + UnsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass if (VERBOSE) { Console.WriteLine("TEST: got expected exc:"); Console.WriteLine(e.StackTrace); } // Make sure exc message includes a path= Assert.IsTrue(e.Message.IndexOf("path=\"") != -1, "got exc message: " + e.Message); } finally { // we should fail to open IW, and so it should be null when we get here. // However, if the test fails (i.e., IW did not fail on open), we need // to close IW. However, if merges are run, IW may throw // IndexFormatTooOldException, and we don't want to mask the Assert.Fail() // above, so close without waiting for merges. if (writer != null) { writer.Dispose(false); } writer = null; } StringBuilder bos = new StringBuilder(); CheckIndex checker = new CheckIndex(dir); checker.InfoStream = new StringWriter(bos); CheckIndex.Status indexStatus = checker.DoCheckIndex(); Assert.IsFalse(indexStatus.Clean); checker.InfoStream.Flush(); Assert.IsTrue(bos.ToString().Contains(typeof(IndexFormatTooOldException).Name)); dir.Dispose(); } }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; IBits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList <IIndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IIndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.GetStringValue()); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue()); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue()); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.GetStringValue()); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.GetStringValue()); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.GetTerms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef((byte[])(Array)bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.Int32BitsToSingle((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #21 since it's norm was // increased: Document d_ = searcher.IndexReader.Document(hits[0].Doc); assertEquals("didn't get the right document first", "21", d_.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
public virtual void TestPrepareCommitRollback() { Directory dir = NewDirectory(); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(5))); writer.Commit(); for (int i = 0; i < 23; i++) { TestIndexWriter.AddDoc(writer); } DirectoryReader reader = DirectoryReader.Open(dir); Assert.AreEqual(0, reader.NumDocs); writer.PrepareCommit(); IndexReader reader2 = DirectoryReader.Open(dir); Assert.AreEqual(0, reader2.NumDocs); writer.Rollback(); IndexReader reader3 = DirectoryReader.OpenIfChanged(reader); Assert.IsNull(reader3); Assert.AreEqual(0, reader.NumDocs); Assert.AreEqual(0, reader2.NumDocs); reader.Dispose(); reader2.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int i = 0; i < 17; i++) { TestIndexWriter.AddDoc(writer); } reader = DirectoryReader.Open(dir); Assert.AreEqual(0, reader.NumDocs); reader.Dispose(); writer.PrepareCommit(); reader = DirectoryReader.Open(dir); Assert.AreEqual(0, reader.NumDocs); reader.Dispose(); writer.Commit(); reader = DirectoryReader.Open(dir); Assert.AreEqual(17, reader.NumDocs); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestCommitOnCloseAbort() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10)); for (int i = 0; i < 14; i++) { TestIndexWriter.AddDoc(writer); } writer.Dispose(); Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "first number of hits"); reader.Dispose(); writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(10)); for (int j = 0; j < 17; j++) { TestIndexWriter.AddDoc(writer); } // Delete all docs: writer.DeleteDocuments(searchTerm); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "reader incorrectly sees changes from writer"); reader.Dispose(); // Now, close the writer: writer.Rollback(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "unreferenced files remain after rollback()"); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "saw changes after writer.abort"); reader.Dispose(); // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(10)); // On abort, writer in fact may write to the same // segments_N file: if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } for (int i = 0; i < 12; i++) { for (int j = 0; j < 17; j++) { TestIndexWriter.AddDoc(writer); } IndexReader r = DirectoryReader.Open(dir); searcher = NewSearcher(r); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "reader incorrectly sees changes from writer"); r.Dispose(); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); searcher = NewSearcher(ir); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(218, hits.Length, "didn't see changes after close"); ir.Dispose(); dir.Dispose(); }
/* * Run one indexer and 2 searchers against single index as * stress test. */ public virtual void RunTest(Directory directory) { TimedThread[] threads = new TimedThread[4]; IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7); ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3; IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random()); // Establish a base index of 100 docs: for (int i = 0; i < 100; i++) { Documents.Document d = new Documents.Document(); d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES)); d.Add(NewTextField("contents", English.IntToEnglish(i), Field.Store.NO)); if ((i - 1) % 7 == 0) { writer.Commit(); } writer.AddDocument(d); } writer.Commit(); IndexReader r = DirectoryReader.Open(directory); Assert.AreEqual(100, r.NumDocs); r.Dispose(); IndexerThread indexerThread = new IndexerThread(writer, threads); threads[0] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(writer, threads); threads[1] = indexerThread2; indexerThread2.Start(); SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[2] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[3] = searcherThread2; searcherThread2.Start(); indexerThread.Join(); indexerThread2.Join(); searcherThread1.Join(); searcherThread2.Join(); writer.Dispose(); Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer"); Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2"); Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1"); Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2"); //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
public virtual void TestTermUTF16SortOrder() { Random rnd = Random; Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif rnd, dir); Document d = new Document(); // Single segment Field f = NewStringField("f", "", Field.Store.NO); d.Add(f); char[] chars = new char[2]; ISet <string> allTerms = new JCG.HashSet <string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s; if (rnd.NextBoolean()) { // Single char if (rnd.NextBoolean()) { // Above surrogates chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff); } else { // Below surrogates chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1); } s = new string(chars, 0, 1); } else { // Surrogate pair chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END); Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END); chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END); s = new string(chars, 0, 2); } allTerms.Add(s); f.SetStringValue(s); writer.AddDocument(d); if ((1 + i) % 42 == 0) { writer.Commit(); } } IndexReader r = writer.GetReader(); // Test each sub-segment foreach (AtomicReaderContext ctx in r.Leaves) { CheckTermsOrder(ctx.Reader, allTerms, false); } CheckTermsOrder(r, allTerms, true); // Test multi segment r.Dispose(); writer.ForceMerge(1); // Test single segment r = writer.GetReader(); CheckTermsOrder(r, allTerms, true); r.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random); iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random.NextBoolean(); ft.StoreTermVectorPositions = Random.NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq; for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(start >= 0); } int end = dp.EndOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(end >= 0 && end >= start); } // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq; for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(start >= 0); } int end = dp.EndOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(end >= 0 && end >= start); } // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
public override void TearDown() { Reader.Dispose(); Dir.Dispose(); base.TearDown(); }
public virtual void Test() { int NUM_DOCS = AtLeast(1000); Directory dir = NewDirectory(); RandomIndexWriter w = null; int docsLeftInthisSegment = 0; int docUpto = 0; while (docUpto < NUM_DOCS) { if (VERBOSE) { Console.WriteLine("TEST: " + docUpto + " of " + NUM_DOCS); } if (docsLeftInthisSegment == 0) { IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if (Random().NextBoolean()) { // Make sure we aggressively mix in SimpleText // since it has different impls for all codec // formats... iwc.SetCodec(Codec.ForName("Lucene46")); } if (w != null) { w.Dispose(); } w = new RandomIndexWriter(Random(), dir, iwc); docsLeftInthisSegment = TestUtil.NextInt(Random(), 10, 100); } Document doc = new Document(); doc.Add(NewStringField("id", Convert.ToString(docUpto), Field.Store.YES)); w.AddDocument(doc); docUpto++; docsLeftInthisSegment--; } if (VERBOSE) { Console.WriteLine("\nTEST: now delete..."); } // Random delete half the docs: HashSet <int?> deleted = new HashSet <int?>(); while (deleted.Count < NUM_DOCS / 2) { int?toDelete = Random().Next(NUM_DOCS); if (!deleted.Contains(toDelete)) { deleted.Add(toDelete); w.DeleteDocuments(new Term("id", Convert.ToString(toDelete))); if (Random().Next(17) == 6) { IndexReader r = w.Reader; Assert.AreEqual(NUM_DOCS - deleted.Count, r.NumDocs); r.Dispose(); } } } w.Dispose(); dir.Dispose(); }
public virtual void TestExpirationTimeDeletionPolicy() { const double SECONDS = 2.0; Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new ExpirationTimeDeletionPolicy(this, dir, SECONDS)); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = 1.0; IndexWriter writer = new IndexWriter(dir, conf); ExpirationTimeDeletionPolicy policy = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy; IDictionary <string, string> commitData = new Dictionary <string, string>(); commitData["commitTime"] = Convert.ToString(Environment.TickCount); writer.SetCommitData(commitData); writer.Commit(); writer.Dispose(); long lastDeleteTime = 0; int targetNumDelete = TestUtil.NextInt32(Random, 1, 5); while (policy.NumDelete < targetNumDelete) { // Record last time when writer performed deletes of // past commits lastDeleteTime = Environment.TickCount; conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy); mp = conf.MergePolicy; mp.NoCFSRatio = 1.0; writer = new IndexWriter(dir, conf); policy = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int j = 0; j < 17; j++) { AddDoc(writer); } commitData = new Dictionary <string, string>(); commitData["commitTime"] = Convert.ToString(Environment.TickCount); writer.SetCommitData(commitData); writer.Commit(); writer.Dispose(); Thread.Sleep((int)(1000.0 * (SECONDS / 5.0))); } // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.GetLastCommitGeneration(dir); string fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); bool oneSecondResolution = true; while (gen > 0) { try { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); // if we are on a filesystem that seems to have only // 1 second resolution, allow +1 second in commit // age tolerance: SegmentInfos sis = new SegmentInfos(); sis.Read(dir, fileName); long modTime = Convert.ToInt64(sis.UserData["commitTime"]); oneSecondResolution &= (modTime % 1000) == 0; long leeway = (long)((SECONDS + (oneSecondResolution ? 1.0 : 0.0)) * 1000); Assert.IsTrue(lastDeleteTime - modTime <= leeway, "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted "); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // OK break; } dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.Dispose(); }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.bufferedUpdatesStream.Any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.GetReader(); Assert.IsFalse(writer.bufferedUpdatesStream.Any()); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.DoMerge = true; fsmp.Start = 0; fsmp.Length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.GetReader(); int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs == null); r2.Dispose(); /* * /// // added docs are in the ram buffer * /// for (int x = 15; x < 20; x++) { * /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); * /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); * /// } * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// // delete from the ram buffer * /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); * /// * /// Term id3 = new Term("id", Integer.toString(3)); * /// * /// // delete from the 1st segment * /// writer.DeleteDocuments(id3); * /// * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// //System.out * /// // .println("segdels1:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// // we cause a merge to happen * /// fsmp.doMerge = true; * /// fsmp.start = 0; * /// fsmp.Length = 2; * /// System.out.println("maybeMerge "+writer.SegmentInfos); * /// * /// SegmentInfo info0 = writer.SegmentInfos.Info(0); * /// SegmentInfo info1 = writer.SegmentInfos.Info(1); * /// * /// writer.MaybeMerge(); * /// System.out.println("maybeMerge after "+writer.SegmentInfos); * /// // there should be docs in RAM * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// // assert we've merged the 1 and 2 segments * /// // and still have a segment leftover == 2 * /// Assert.AreEqual(2, writer.SegmentInfos.Size()); * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); * /// * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// IndexReader r = writer.GetReader(); * /// IndexReader r1 = r.getSequentialSubReaders()[0]; * /// printDelDocs(r1.GetLiveDocs()); * /// int[] docs = toDocsArray(id3, null, r); * /// System.out.println("id3 docs:"+Arrays.toString(docs)); * /// // there shouldn't be any docs for id:3 * /// Assert.IsTrue(docs == null); * /// r.Dispose(); * /// * /// part2(writer, fsmp); * /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
public virtual void TestKeepLastNDeletionPolicy() { const int N = 5; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int i = 0; i < 17; i++) { AddDoc(writer); } writer.ForceMerge(1); writer.Dispose(); } Assert.IsTrue(policy.NumDelete > 0); Assert.AreEqual(N + 1, policy.NumOnInit); Assert.AreEqual(N + 1, policy.NumOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); if (i == N) { Assert.Fail("should have failed on commits prior to last " + N); } } catch (IOException /*e*/) { if (i != N) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Dispose(); } }
public virtual void TestAddIndexOnDiskFull() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal)); int START_COUNT = 57; int NUM_DIR = TestNightly ? 50 : 5; int END_COUNT = START_COUNT + NUM_DIR * (TestNightly ? 25 : 5); // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = NewDirectory(); IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); for (int j = 0; j < 25; j++) { AddDocWithIndex(writer, 25 * i + j); } writer.Dispose(); string[] files = dirs[i].ListAll(); for (int j = 0; j < files.Length; j++) { inputDiskUsage += dirs[i].FileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: MockDirectoryWrapper startDir = NewMockDirectory(); IndexWriter indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); for (int j = 0; j < START_COUNT; j++) { AddDocWithIndex(indWriter, j); } indWriter.Dispose(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.Open(startDir); Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq"); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(57, hits.Length, "first number of hits"); reader.Dispose(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.ListAll(); long diskUsage = startDir.GetSizeInBytes(); long startDiskUsage = 0; string[] files_ = startDir.ListAll(); for (int i = 0; i < files_.Length; i++) { startDiskUsage += startDir.FileLength(files_[i]); } for (int iter = 0; iter < 3; iter++) { if (Verbose) { Console.WriteLine("TEST: iter=" + iter); } // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + TestUtil.NextInt32(Random, 50, 200); int method = iter; bool success = false; bool done = false; string methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + forceMerge(1)"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (Verbose) { Console.WriteLine("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory(startDir, NewIOContext(Random))); indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy(false))); IOException err = null; IMergeScheduler ms = indWriter.Config.MergeScheduler; for (int x = 0; x < 2; x++) { if (ms is IConcurrentMergeScheduler) // this test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. { if (0 == x) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } else { ((IConcurrentMergeScheduler)ms).ClearSuppressExceptions(); } } // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double)diskFree) / diskUsage; long thisDiskFree; string testName = null; if (0 == x) { dir.RandomIOExceptionRateOnOpen = Random.NextDouble() * 0.01; thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (Verbose) { testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } } else { dir.RandomIOExceptionRateOnOpen = 0.0; thisDiskFree = 0; rate = 0.0; if (Verbose) { testName = "disk full test " + methodName + " with unlimited disk space"; } } if (Verbose) { Console.WriteLine("\ncycle: " + testName); } dir.TrackDiskUsage = true; dir.MaxSizeInBytes = thisDiskFree; dir.RandomIOExceptionRate = rate; try { if (0 == method) { if (Verbose) { Console.WriteLine("TEST: now addIndexes count=" + dirs.Length); } indWriter.AddIndexes(dirs); if (Verbose) { Console.WriteLine("TEST: now forceMerge"); } indWriter.ForceMerge(1); } else if (1 == method) { IndexReader[] readers = new IndexReader[dirs.Length]; for (int i = 0; i < dirs.Length; i++) { readers[i] = DirectoryReader.Open(dirs[i]); } try { indWriter.AddIndexes(readers); } finally { for (int i = 0; i < dirs.Length; i++) { readers[i].Dispose(); } } } else { indWriter.AddIndexes(dirs); } success = true; if (Verbose) { Console.WriteLine(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (Verbose) { Console.WriteLine(" hit IOException: " + e); Console.WriteLine(e.StackTrace); } if (1 == x) { Console.WriteLine(e.StackTrace); Assert.Fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done TestUtil.SyncConcurrentMerges(indWriter); if (Verbose) { Console.WriteLine(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): dir.RandomIOExceptionRateOnOpen = 0.0; try { reader = DirectoryReader.Open(dir); } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.DocFreq(searchTerm); if (success) { if (result != START_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = NewSearcher(reader); try { hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs; } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (result2 != result) { Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } reader.Dispose(); if (Verbose) { Console.WriteLine(" count is " + result); } if (done || result == END_COUNT) { break; } } if (Verbose) { Console.WriteLine(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes"); } // Make sure we don't hit disk full during close below: dir.MaxSizeInBytes = 0; dir.RandomIOExceptionRate = 0.0; dir.RandomIOExceptionRateOnOpen = 0.0; indWriter.Dispose(); // Wait for all BG threads to finish else // dir.Dispose() will throw IOException because // there are still open files TestUtil.SyncConcurrentMerges(ms); dir.Dispose(); // Try again with more free space: diskFree += TestNightly ? TestUtil.NextInt32(Random, 4000, 8000) : TestUtil.NextInt32(Random, 40000, 80000); } } startDir.Dispose(); foreach (Directory dir in dirs) { dir.Dispose(); } }
public virtual void TestRandomStoredFields() { Directory dir = NewDirectory(); Random rand = Random(); RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20))); //w.w.setNoCFSRatio(0.0); int docCount = AtLeast(200); int fieldCount = TestUtil.NextInt(rand, 1, 5); IList <int?> fieldIDs = new List <int?>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.Tokenized = false; Field idField = NewField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.Add(i); } IDictionary <string, Document> docs = new Dictionary <string, Document>(); if (VERBOSE) { Console.WriteLine("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.Stored = true; for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(idField); string id = "" + i; idField.StringValue = id; docs[id] = doc; if (VERBOSE) { Console.WriteLine("TEST: add doc id=" + id); } foreach (int field in fieldIDs) { string s; if (rand.Next(4) != 3) { s = TestUtil.RandomUnicodeString(rand, 1000); doc.Add(NewField("f" + field, s, customType2)); } else { s = null; } } w.AddDocument(doc); if (rand.Next(50) == 17) { // mixup binding of field name -> Number every so often fieldIDs = CollectionsHelper.Shuffle(fieldIDs); } if (rand.Next(5) == 3 && i > 0) { string delID = "" + rand.Next(i); if (VERBOSE) { Console.WriteLine("TEST: delete doc id=" + delID); } w.DeleteDocuments(new Term("id", delID)); docs.Remove(delID); } } if (VERBOSE) { Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields"); } if (docs.Count > 0) { string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/); for (int x = 0; x < 2; x++) { IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: cycle x=" + x + " r=" + r); } int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string testID = idsList[rand.Next(idsList.Length)]; if (VERBOSE) { Console.WriteLine("TEST: test id=" + testID); } TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1); Assert.AreEqual(1, hits.TotalHits); Document doc = r.Document(hits.ScoreDocs[0].Doc); Document docExp = docs[testID]; for (int i = 0; i < fieldCount; i++) { Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i)); } } r.Dispose(); w.ForceMerge(1); } } w.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { int num = AtLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); // we can do this because we use NoMergePolicy (and dont merge to "nothing") w.KeepFullyDeletedSegments = true; IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >(); HashSet <int?> deleted = new HashSet <int?>(); IList <BytesRef> terms = new List <BytesRef>(); int numDocs = TestUtil.NextInt(Random(), 1, 100 * RANDOM_MULTIPLIER); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field id = NewStringField("id", "", Field.Store.NO); doc.Add(id); bool onlyUniqueTerms = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && Random().NextBoolean() && terms.Count > 0) { // re-use existing term BytesRef term = terms[Random().Next(terms.Count)]; docs[term].Add(i); f.SetStringValue(term.Utf8ToString()); } else { string s = TestUtil.RandomUnicodeString(Random(), 10); BytesRef term = new BytesRef(s); if (!docs.ContainsKey(term)) { docs[term] = new List <int?>(); } docs[term].Add(i); terms.Add(term); uniqueTerms.Add(term); f.SetStringValue(s); } id.SetStringValue("" + i); w.AddDocument(doc); if (Random().Next(4) == 1) { w.Commit(); } if (i > 0 && Random().Next(20) == 1) { int delID = Random().Next(i); deleted.Add(delID); w.DeleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { Console.WriteLine("TEST: delete " + delID); } } } if (VERBOSE) { List <BytesRef> termsList = new List <BytesRef>(uniqueTerms); #pragma warning disable 612, 618 termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 Console.WriteLine("TEST: terms in UTF16 order:"); foreach (BytesRef b in termsList) { Console.WriteLine(" " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b); foreach (int docID in docs[b]) { if (deleted.Contains(docID)) { Console.WriteLine(" " + docID + " (deleted)"); } else { Console.WriteLine(" " + docID); } } } } IndexReader reader = w.GetReader(); w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } IBits liveDocs = MultiFields.GetLiveDocs(reader); foreach (int delDoc in deleted) { Assert.IsFalse(liveDocs.Get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms[Random().Next(terms.Count)]; if (VERBOSE) { Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term); } DocsEnum docsEnum = TestUtil.Docs(Random(), reader, "field", term, liveDocs, null, DocsFlags.NONE); Assert.IsNotNull(docsEnum); foreach (int docID in docs[term]) { if (!deleted.Contains(docID)) { Assert.AreEqual(docID, docsEnum.NextDoc()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } reader.Dispose(); dir.Dispose(); } }
public void TestFieldContents_2() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.EntryIterator; // just iterate through words assertEquals("First element isn't correct", "Jerry", it.Next().Utf8ToString()); assertEquals("Second element isn't correct", "Tom", it.Next().Utf8ToString()); assertNull("Nonexistent element is really null", it.Next()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public virtual void TestBasic() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2))); StringBuilder sb = new StringBuilder(265); string term = "term"; for (int i = 0; i < 30; i++) { Document doc = new Document(); sb.Append(term).Append(" "); string content = sb.ToString(); Field noTf = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType); doc.Add(noTf); Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType); doc.Add(tf); writer.AddDocument(doc); //System.out.println(d); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index */ IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); try { searcher.Search(pq, 10); Assert.Fail("did not hit expected exception"); } catch (Exception e) { Exception cause = e; // If the searcher uses an executor service, the IAE is wrapped into other exceptions while (cause.InnerException != null) { cause = cause.InnerException; } if (!(cause is InvalidOperationException)) { throw new InvalidOperationException("Expected an IAE", e); } // else OK because positions are not indexed } searcher.Search(q1, new CountingHitCollectorAnonymousInnerClassHelper(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new CountingHitCollectorAnonymousInnerClassHelper2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new CountingHitCollectorAnonymousInnerClassHelper3(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new CountingHitCollectorAnonymousInnerClassHelper4(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new CountingHitCollectorAnonymousInnerClassHelper5(this)); Assert.AreEqual(15, CountingHitCollector.Count); reader.Dispose(); dir.Dispose(); }
public void TestSpellchecker() { Directory dir = NewDirectory(); SpellChecker sc = new SpellChecker(dir); indexReader = DirectoryReader.Open(store); sc.IndexDictionary(new LuceneDictionary(indexReader, "contents"), NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false); string[] suggestions = sc.SuggestSimilar("Tam", 1); assertEquals(1, suggestions.Length); assertEquals("Tom", suggestions[0]); suggestions = sc.SuggestSimilar("Jarry", 1); assertEquals(1, suggestions.Length); assertEquals("Jerry", suggestions[0]); indexReader.Dispose(); sc.Dispose(); dir.Dispose(); }
public virtual void TestForceMergeDeletes2() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy(50))); Document document = new Document(); FieldType customType = new FieldType(); customType.IsStored = true; FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED); customType1.IsTokenized = false; customType1.StoreTermVectors = true; customType1.StoreTermVectorPositions = true; customType1.StoreTermVectorOffsets = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); Field termVectorField = NewField("termVector", "termVector", customType1); document.Add(termVectorField); Field idField = NewStringField("id", "", Field.Store.NO); document.Add(idField); for (int i = 0; i < 98; i++) { idField.SetStringValue("" + i); writer.AddDocument(document); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); Assert.AreEqual(98, ir.MaxDoc); Assert.AreEqual(98, ir.NumDocs); ir.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, dontMergeConfig); for (int i = 0; i < 98; i += 2) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.NumDocs); ir.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy(3))); Assert.AreEqual(49, writer.NumDocs); writer.ForceMergeDeletes(); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.MaxDoc); Assert.AreEqual(49, ir.NumDocs); ir.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { // LUCENENET specific - disable the test if asserts are not enabled AssumeTrue("This test requires asserts to be enabled.", Debugging.AssertsEnabled); int numThreads = 1 + Random.Next(8); int numDocumentsToIndex = 50 + AtLeast(70); AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); iwc.SetFlushPolicy(flushPolicy); int numDWPT = 1 + Random.Next(8); DocumentsWriterPerThreadPool threadPool = new DocumentsWriterPerThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(numDocs, writer, lineDocFile, true); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); if (flushPolicy.FlushOnRAM && !flushPolicy.FlushOnDocCount && !flushPolicy.FlushOnDeleteTerms) { long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.IsTrue(flushPolicy.peakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); if (flushPolicy.hasMarkedPending) { assertTrue("max: " + maxRAMBytes + " " + flushControl.peakActiveBytes, maxRAMBytes <= flushControl.peakActiveBytes); } } AssertActiveBytesAfter(flushControl); writer.Commit(); Assert.AreEqual(0, flushControl.ActiveBytes); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(numDocumentsToIndex, r.NumDocs); Assert.AreEqual(numDocumentsToIndex, r.MaxDoc); if (!flushPolicy.FlushOnRAM) { assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.WasStalled); assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.HasBlocked); } r.Dispose(); writer.Dispose(); dir.Dispose(); }
private void CloseIndexReader(IndexReader myReader) { myReader.Close(); myReader.Dispose(); }
// Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 2; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } MockDirectoryWrapper dir = NewMockDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(2) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(4)); IndexWriter writer = new IndexWriter(dir, config); var scheduler = config.mergeScheduler as IConcurrentMergeScheduler; if (scheduler != null) { scheduler.SetSuppressExceptions(); } IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, true, NewField); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } Thread.Sleep(10); dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Dispose(false); success = true; } catch (IOException) { failure.ClearDoFail(); writer.Dispose(false); } if (VERBOSE) { Console.WriteLine("TEST: success=" + success); } if (success) { IndexReader reader = DirectoryReader.Open(dir); IBits delDocs = MultiFields.GetLiveDocs(reader); for (int j = 0; j < reader.MaxDoc; j++) { if (delDocs == null || !delDocs.Get(j)) { reader.Document(j); reader.GetTermVectors(j); } } reader.Dispose(); } dir.Dispose(); } }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random.NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.SetInfoStream(new TextWriterInfoStream(Console.Out)); } var ms = iwc.MergeScheduler; if (ms is IConcurrentMergeScheduler) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random.Next(5) == 3) { if (Random.NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs); lastNumDocs = r2.NumDocs; //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random.NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); ISet <string> files = new JCG.HashSet <string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
public virtual void TestNoWaitClose() { Directory directory = NewDirectory(); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IsTokenized = false; Field idField = NewField("id", "", customType); doc.Add(idField); for (int pass = 0; pass < 2; pass++) { if (Verbose) { Console.WriteLine("TEST: pass="******"TEST: iter=" + iter); } for (int j = 0; j < 199; j++) { idField.SetStringValue(Convert.ToString(iter * 201 + j)); writer.AddDocument(doc); } int delID = iter * 199; for (int j = 0; j < 20; j++) { writer.DeleteDocuments(new Term("id", Convert.ToString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 2; IndexWriter finalWriter = writer; List <Exception> failure = new List <Exception>(); ThreadJob t1 = new ThreadAnonymousClass(this, doc, finalWriter, failure); if (failure.Count > 0) { throw failure[0]; } t1.Start(); writer.Dispose(false); t1.Join(); // Make sure reader can read IndexReader reader = DirectoryReader.Open(directory); reader.Dispose(); // Reopen writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy())); } writer.Dispose(); } directory.Dispose(); }
public virtual void TestArbitraryFields() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int NUM_DOCS = AtLeast(27); if (VERBOSE) { Console.WriteLine("TEST: " + NUM_DOCS + " docs"); } int[] fieldsPerDoc = new int[NUM_DOCS]; int baseCount = 0; for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int fieldCount = TestUtil.NextInt(Random(), 1, 17); fieldsPerDoc[docCount] = fieldCount - 1; int finalDocCount = docCount; if (VERBOSE) { Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount); } int finalBaseCount = baseCount; baseCount += fieldCount - 1; w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount)); } IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); int counter = 0; for (int id = 0; id < NUM_DOCS; id++) { if (VERBOSE) { Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter); } TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; Document doc = s.Doc(docID); int endCounter = counter + fieldsPerDoc[id]; while (counter < endCounter) { string name = "f" + counter; int fieldID = counter % 10; bool stored = (counter & 1) == 0 || fieldID == 3; bool binary = fieldID == 3; bool indexed = fieldID != 3; string stringValue; if (fieldID != 3 && fieldID != 9) { stringValue = "text " + counter; } else { stringValue = null; } // stored: if (stored) { IIndexableField f = doc.GetField(name); Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); if (binary) { Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); BytesRef b = f.GetBinaryValue(); Assert.IsNotNull(b); Assert.AreEqual(10, b.Length); for (int idx = 0; idx < 10; idx++) { Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]); } } else { Debug.Assert(stringValue != null); Assert.AreEqual(stringValue, f.GetStringValue()); } } if (indexed) { bool tv = counter % 2 == 1 && fieldID != 9; if (tv) { Terms tfv = r.GetTermVectors(docID).GetTerms(name); Assert.IsNotNull(tfv); TermsEnum termsEnum = tfv.GetIterator(null); Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(1, dpEnum.NextPosition()); Assert.AreEqual(new BytesRef("text"), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(0, dpEnum.NextPosition()); Assert.IsNull(termsEnum.Next()); // TODO: offsets } else { Fields vectors = r.GetTermVectors(docID); Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null); } BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST); TopDocs hits2 = s.Search(bq, 1); Assert.AreEqual(1, hits2.TotalHits); Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc); bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST); TopDocs hits3 = s.Search(bq, 1); Assert.AreEqual(1, hits3.TotalHits); Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc); } counter++; } } r.Dispose(); dir.Dispose(); }
private static void CloseQuietly(IndexReader reader) { try { if (reader != null) reader.Dispose(); } catch (Exception e) { // do nothing } }
public virtual void TestFlushExceptions() { MockDirectoryWrapper directory = NewMockDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(this); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2)); Document doc = new Document(); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int extraCount = 0; for (int i = 0; i < 10; i++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + i); } for (int j = 0; j < 20; j++) { idField.SetStringValue(Convert.ToString(i * 20 + j)); writer.AddDocument(doc); } // must cycle here because sometimes the merge flushes // the doc we just added and so there's nothing to // flush, and we don't hit the exception while (true) { writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(true, true); if (failure.HitExc) { Assert.Fail("failed to hit IOException"); } extraCount++; } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine(ioe.StackTrace); } failure.ClearDoFail(); break; } } Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(directory); Assert.AreEqual(200 + extraCount, reader.NumDocs); reader.Dispose(); directory.Dispose(); }