WARNING: this API is new and experimental and is subject to suddenly change in the next release.
public virtual void TestDeletedDocs() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 19; i++) { writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(dir); reader.DeleteDocument(5); reader.Close(); System.IO.MemoryStream bos = new System.IO.MemoryStream(1024); CheckIndex checker = new CheckIndex(dir); checker.SetInfoStream(new System.IO.StreamWriter(bos)); //checker.setInfoStream(System.out); CheckIndex.Status indexStatus = checker.CheckIndex_Renamed_Method(); if (indexStatus.clean == false) { System.Console.Out.WriteLine("CheckIndex failed"); char[] tmpChar; byte[] tmpByte; tmpByte = bos.GetBuffer(); tmpChar = new char[bos.Length]; System.Array.Copy(tmpByte, 0, tmpChar, 0, tmpChar.Length); System.Console.Out.WriteLine(new System.String(tmpChar)); Assert.Fail(); } CheckIndex.Status.SegmentInfoStatus seg = (CheckIndex.Status.SegmentInfoStatus)indexStatus.segmentInfos[0]; Assert.IsTrue(seg.openReaderPassed); Assert.IsNotNull(seg.diagnostics); Assert.IsNotNull(seg.fieldNormStatus); Assert.IsNull(seg.fieldNormStatus.error); Assert.AreEqual(1, seg.fieldNormStatus.totFields); Assert.IsNotNull(seg.termIndexStatus); Assert.IsNull(seg.termIndexStatus.error); Assert.AreEqual(1, seg.termIndexStatus.termCount); Assert.AreEqual(19, seg.termIndexStatus.totFreq); Assert.AreEqual(18, seg.termIndexStatus.totPos); Assert.IsNotNull(seg.storedFieldStatus); Assert.IsNull(seg.storedFieldStatus.error); Assert.AreEqual(18, seg.storedFieldStatus.docCount); Assert.AreEqual(18, seg.storedFieldStatus.totFields); Assert.IsNotNull(seg.termVectorStatus); Assert.IsNull(seg.termVectorStatus.error); Assert.AreEqual(18, seg.termVectorStatus.docCount); Assert.AreEqual(18, seg.termVectorStatus.totVectors); Assert.IsTrue(seg.diagnostics.Count > 0); System.Collections.IList onlySegments = new System.Collections.ArrayList(); onlySegments.Add("_0"); Assert.IsTrue(checker.CheckIndex_Renamed_Method(onlySegments).clean == true); }
public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { if ("Lucene3x".Equals(Codec.Default.Name)) { throw new Exception("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } dir.CheckIndexOnClose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random(), TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = Environment.TickCount; w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec"); } savedTerms = ts.SavedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms)); Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public virtual void TestUnsupportedOldIndexes() { for (int i = 0; i < unsupportedNames.Length; i++) { if (Verbose) { Console.WriteLine("TEST: index " + unsupportedNames[i]); } DirectoryInfo oldIndexDir = CreateTempDir(unsupportedNames[i]); using (Stream dataFile = this.GetType().FindAndGetManifestResourceStream("unsupported." + unsupportedNames[i] + ".zip")) { TestUtil.Unzip(dataFile, oldIndexDir); } BaseDirectoryWrapper dir = NewFSDirectory(oldIndexDir); // don't checkindex, these are intentionally not supported dir.CheckIndexOnDispose = false; IndexReader reader = null; IndexWriter writer = null; try { reader = DirectoryReader.Open(dir); Assert.Fail("DirectoryReader.open should not pass for " + unsupportedNames[i]); } #pragma warning disable 168 catch (IndexFormatTooOldException e) #pragma warning restore 168 { // pass } finally { if (reader != null) { reader.Dispose(); } reader = null; } try { writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Assert.Fail("IndexWriter creation should not pass for " + unsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass if (Verbose) { Console.WriteLine("TEST: got expected exc:"); Console.WriteLine(e.StackTrace); } // Make sure exc message includes a path= Assert.IsTrue(e.Message.IndexOf("path=\"", StringComparison.Ordinal) != -1, "got exc message: " + e.Message); } finally { // we should fail to open IW, and so it should be null when we get here. // However, if the test fails (i.e., IW did not fail on open), we need // to close IW. However, if merges are run, IW may throw // IndexFormatTooOldException, and we don't want to mask the Assert.Fail() // above, so close without waiting for merges. if (writer != null) { writer.Dispose(false); } writer = null; } StringBuilder bos = new StringBuilder(); CheckIndex checker = new CheckIndex(dir); checker.InfoStream = new StringWriter(bos); CheckIndex.Status indexStatus = checker.DoCheckIndex(); Assert.IsFalse(indexStatus.Clean); checker.InfoStream.Flush(); Assert.IsTrue(bos.ToString().Contains(typeof(IndexFormatTooOldException).Name)); dir.Dispose(); } }
public virtual void TestDeletedDocs() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2)); for (int i = 0; i < 19; i++) { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; doc.Add(NewField("field", "aaa" + i, customType)); writer.AddDocument(doc); } writer.ForceMerge(1); writer.Commit(); writer.DeleteDocuments(new Term("field", "aaa5")); writer.Dispose(); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); if (VERBOSE) { checker.InfoStream = Console.Out; } CheckIndex.Status indexStatus = checker.DoCheckIndex(); if (indexStatus.Clean == false) { Console.WriteLine("CheckIndex failed"); checker.FlushInfoStream(); Console.WriteLine(bos.ToString()); Assert.Fail(); } CheckIndex.Status.SegmentInfoStatus seg = indexStatus.SegmentInfos[0]; Assert.IsTrue(seg.OpenReaderPassed); Assert.IsNotNull(seg.Diagnostics); Assert.IsNotNull(seg.FieldNormStatus); Assert.IsNull(seg.FieldNormStatus.Error); Assert.AreEqual(1, seg.FieldNormStatus.TotFields); Assert.IsNotNull(seg.TermIndexStatus); Assert.IsNull(seg.TermIndexStatus.Error); Assert.AreEqual(18, seg.TermIndexStatus.TermCount); Assert.AreEqual(18, seg.TermIndexStatus.TotFreq); Assert.AreEqual(18, seg.TermIndexStatus.TotPos); Assert.IsNotNull(seg.StoredFieldStatus); Assert.IsNull(seg.StoredFieldStatus.Error); Assert.AreEqual(18, seg.StoredFieldStatus.DocCount); Assert.AreEqual(18, seg.StoredFieldStatus.TotFields); Assert.IsNotNull(seg.TermVectorStatus); Assert.IsNull(seg.TermVectorStatus.Error); Assert.AreEqual(18, seg.TermVectorStatus.DocCount); Assert.AreEqual(18, seg.TermVectorStatus.TotVectors); Assert.IsTrue(seg.Diagnostics.Count > 0); IList <string> onlySegments = new List <string>(); onlySegments.Add("_0"); Assert.IsTrue(checker.DoCheckIndex(onlySegments).Clean == true); dir.Dispose(); }
public virtual void TestUnsupportedOldIndexes() { for (int i = 0; i < UnsupportedNames.Length; i++) { if (VERBOSE) { Console.WriteLine("TEST: index " + UnsupportedNames[i]); } DirectoryInfo oldIndexDir = CreateTempDir(UnsupportedNames[i]); TestUtil.Unzip(GetDataFile("unsupported." + UnsupportedNames[i] + ".zip"), oldIndexDir); BaseDirectoryWrapper dir = NewFSDirectory(oldIndexDir); // don't checkindex, these are intentionally not supported dir.CheckIndexOnClose = false; IndexReader reader = null; IndexWriter writer = null; try { reader = DirectoryReader.Open(dir); Assert.Fail("DirectoryReader.open should not pass for " + UnsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass } finally { if (reader != null) { reader.Dispose(); } reader = null; } try { writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Assert.Fail("IndexWriter creation should not pass for " + UnsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass if (VERBOSE) { Console.WriteLine("TEST: got expected exc:"); Console.WriteLine(e.StackTrace); } // Make sure exc message includes a path= Assert.IsTrue(e.Message.IndexOf("path=\"") != -1, "got exc message: " + e.Message); } finally { // we should fail to open IW, and so it should be null when we get here. // However, if the test fails (i.e., IW did not fail on open), we need // to close IW. However, if merges are run, IW may throw // IndexFormatTooOldException, and we don't want to mask the Assert.Fail() // above, so close without waiting for merges. if (writer != null) { writer.Dispose(false); } writer = null; } MemoryStream bos = new MemoryStream(1024); CheckIndex checker = new CheckIndex(dir); checker.InfoStream = new StreamWriter(bos.ToString(), false, IOUtils.CHARSET_UTF_8); CheckIndex.Status indexStatus = checker.DoCheckIndex(); Assert.IsFalse(indexStatus.Clean); Assert.IsTrue(bos.ToString().Contains(typeof(IndexFormatTooOldException).Name)); dir.Dispose(); } }
public virtual void Test2BTerms_Mem() { if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) { throw RuntimeException.Create("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } dir.CheckIndexOnDispose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random, TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } savedTerms = ts.savedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms is null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }