public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(16, hits.Length); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (!autoCommit) { Assert.AreEqual(3 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
/* * Return all files referenced by this SegmentInfo. The * returns List is a locally cached List so you should not * modify it. */ public IList <string> Files() { if (files != null) { // Already cached: return(files); } List <string> fileList = new List <string>(); bool useCompoundFile = GetUseCompoundFile(); if (useCompoundFile) { fileList.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); } else { System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; for (int i = 0; i < exts.Length; i++) { AddIfExists(fileList, name + "." + exts[i]); } } if (docStoreOffset != -1) { // We are sharing doc stores (stored fields, term // vectors) with other segments System.Diagnostics.Debug.Assert(docStoreSegment != null); if (docStoreIsCompoundFile) { fileList.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION); } else { System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; for (int i = 0; i < exts.Length; i++) { AddIfExists(fileList, docStoreSegment + "." + exts[i]); } } } else if (!useCompoundFile) { // We are not sharing, and, these files were not // included in the compound file System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; for (int i = 0; i < exts.Length; i++) { AddIfExists(fileList, name + "." + exts[i]); } } System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName))) { fileList.Add(delFileName); } // Careful logic for norms files if (normGen != null) { for (int i = 0; i < normGen.Length; i++) { long gen = normGen[i]; if (gen >= YES) { // Definitely a separate norm file, with generation: fileList.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); } else if (NO == gen) { // No separate norms but maybe plain norms // in the non compound file case: if (!hasSingleNormFile && !useCompoundFile) { System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; if (dir.FileExists(fileName)) { fileList.Add(fileName); } } } else if (CHECK_DIR == gen) { // Pre-2.1: we have to check file existence System.String fileName = null; if (useCompoundFile) { fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; } else if (!hasSingleNormFile) { fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; } if (fileName != null && dir.FileExists(fileName)) { fileList.Add(fileName); } } } } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { // Pre-2.1: we have to scan the dir to find all // matching _X.sN/_X.fN files for our segment: System.String prefix; if (useCompoundFile) { prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; } else { prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION; } int prefixLength = prefix.Length; System.String[] allFiles = dir.ListAll(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); for (int i = 0; i < allFiles.Length; i++) { System.String fileName = allFiles[i]; if (filter.Accept(null, fileName) && fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix)) { fileList.Add(fileName); } } } //System.Diagnostics.Debug.Assert(); files = fileList; return(files); }
public virtual void TestKeepLastNDeletionPolicy() { int N = 5; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; Directory dir = new RAMDirectory(); KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); for (int j = 0; j < N + 1; j++) { IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 17; i++) { AddDoc(writer); } writer.Optimize(); writer.Close(); } Assert.IsTrue(policy.numDelete > 0); Assert.AreEqual(N + 1, policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 1); } else { Assert.AreEqual(N + 1, policy.numOnCommit); } // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); if (i == N) { Assert.Fail("should have failed on commits prior to last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.SetMergeScheduler(new SerialMergeScheduler()); for (int i = 0; i < 107; i++) { AddDoc(writer); if (autoCommit && i % 10 == 0) { writer.Commit(); } } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (!autoCommit) { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Test listCommits System.Collections.ICollection commits = IndexReader.ListCommits(dir); if (!autoCommit) { // 1 from opening writer + 2 from closing writer Assert.AreEqual(3, commits.Count); } // 1 from opening writer + 2 from closing writer + // 11 from calling writer.commit() explicitly above else { Assert.AreEqual(14, commits.Count); } System.Collections.IEnumerator it = commits.GetEnumerator(); // Make sure we can open a reader on each commit: while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; IndexReader r = IndexReader.Open(commit, null); r.Close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }
public virtual void TestExpirationTimeDeletionPolicy() { double SECONDS = 2.0; bool autoCommit = false; bool useCompoundFile = true; Directory dir = new RAMDirectory(); ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); long lastDeleteTime = 0; for (int i = 0; i < 7; i++) { // Record last time when writer performed deletes of // past commits lastDeleteTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } writer.Close(); // Make sure to sleep long enough so that some commit // points will be deleted: System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int)(1000.0 * (SECONDS / 5.0)))); } // First, make sure the policy in fact deleted something: Assert.IsTrue(policy.numDelete > 0, "no commits were deleted"); // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); while (gen > 0) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); long modTime = dir.FileModified(fileName); Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted"); } catch (System.IO.IOException e) { // OK break; } dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.Close(); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(() => (StoredFieldsReader)fieldsReaderOrig.Clone()); termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(() => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone()); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(fields != null); } // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(normsProducer != null); } } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }