public override void TearDown() { base.TearDown(); readerA.Close(); readerB.Close(); readerX.Close(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document(), null); } w.Commit(null); w.DeleteDocuments(null, new MatchAllDocsQuery()); w.Commit(null); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(null), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open((Directory)d, true, null); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
public virtual void Test() { IndexReader reader = null; try { reader = IndexReader.Open((Directory)directory, true, null); for (int i = 1; i <= numThreads; i++) { TestTermPositionVectors(reader, i); } } catch (System.IO.IOException ioe) { Assert.Fail(ioe.Message); } finally { if (reader != null) { try { /* close the opened reader */ reader.Close(); } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } } } }
public virtual void TestRAMDirectory_Renamed() { Directory dir = FSDirectory.Open(indexDir); MockRAMDirectory ramDir = new MockRAMDirectory(dir); // close the underlaying directory dir.Close(); // Check size Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.Open((Directory)ramDir, true, null); Assert.AreEqual(docsToAdd, reader.NumDocs()); // open search zo check if all doc's are there IndexSearcher searcher = new IndexSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.Doc(i, null); Assert.IsTrue(doc.GetField("content") != null); } // cleanup reader.Close(); searcher.Close(); }
public virtual void TestVerifyIndex() { IndexReader reader = IndexReader.Open(mDirectory, true, null); Assert.AreEqual(8, reader.NumDocs()); reader.Close(); }
public virtual void TestCompressionTools() { IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES); IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.Add(binaryFldCompressed); doc.Add(stringFldCompressed); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null)))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed)); reader.Close(); dir.Close(); }
private static void doSpellCheckerIndexing(string LuceneIndexDir, string SpellCheckerIndexDir) { try { // http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/spell/SpellChecker.html FSDirectory spellCheckerIndexDir = FSDirectory.GetDirectory(SpellCheckerIndexDir, false); FSDirectory indexDir = FSDirectory.GetDirectory(LuceneIndexDir, false); SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellCheckerIndexDir); spellchecker.ClearIndex(); // SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker (global::Lucene.Net.Store.Directory SpellChecker(spellIndexDirectory); IndexReader r = IndexReader.Open(indexDir); try { // To index a field of a user index: Dictionary dict = new SpellChecker.Net.Search.Spell.LuceneDictionary(r, "title"); spellchecker.IndexDictionary(dict); } finally { r.Close(); } } catch (Exception ex) { Console.Write("Could not create spell-checking index" + ex.Message); } }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), IndexWriter.MaxFieldLength.LIMITED, null); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir, true, null); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10, null).TotalHits); reader.Close(); dir.Close(); }
public void CloseReader(IndexReader reader) { try { reader.Close(); } catch (System.IO.IOException ex) { //TODO: extract subReaders and close each one individually ReaderProviderHelper.Clean(reader); new SearchException("Unable to close multiReader", ex); } }
public virtual void TestSetBufferSize() { System.IO.DirectoryInfo indexDir = new System.IO.DirectoryInfo(System.IO.Path.Combine(AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom()); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = false; for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open((Directory)dir, false, null); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(37, reader.DocFreq(ccc, null)); reader.DeleteDocument(0, null); Assert.AreEqual(37, reader.DocFreq(aaa, null)); dir.tweakBufferSizes(); reader.DeleteDocument(4, null); Assert.AreEqual(reader.DocFreq(bbb, null), 37); dir.tweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.tweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
/*** * Understands the lucene query syntax */ public List <Utilities.Language.TextIndexing.IndexResult> GetDocumentsWithQuery(string query) { List <Utilities.Language.TextIndexing.IndexResult> fingerprints = new List <Utilities.Language.TextIndexing.IndexResult>(); HashSet <string> fingerprints_already_seen = new HashSet <string>(); try { using (Lucene.Net.Index.IndexReader index_reader = Lucene.Net.Index.IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true)) { using (Lucene.Net.Search.IndexSearcher index_searcher = new Lucene.Net.Search.IndexSearcher(index_reader)) { Lucene.Net.QueryParsers.QueryParser query_parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_29, "content", analyzer); Lucene.Net.Search.Query query_object = query_parser.Parse(query); Lucene.Net.Search.Hits hits = index_searcher.Search(query_object); var i = hits.Iterator(); while (i.MoveNext()) { Lucene.Net.Search.Hit hit = (Lucene.Net.Search.Hit)i.Current; string fingerprint = hit.Get("fingerprint"); string page = hit.Get("page"); if (!fingerprints_already_seen.Contains(fingerprint)) { fingerprints_already_seen.Add(fingerprint); IndexResult index_result = new IndexResult { fingerprint = fingerprint, score = hit.GetScore() }; fingerprints.Add(index_result); } } // Close the index index_searcher.Close(); } index_reader.Close(); } } catch (Exception ex) { Logging.Warn(ex, "GetDocumentsWithQuery: There was a problem opening the index file for searching."); } return(fingerprints); }
} // doIndex private static void removeAllDuplicateAndDeletedFiles(IndexableFileInfo[] fileInfos, string LuceneIndexDir, IndexCreationMode indexCreationMode) { if (indexCreationMode != IndexCreationMode.AppendToExistingIndex) { return; } IndexReader reader = IndexReader.Open(LuceneIndexDir); try { int numDocs = reader.NumDocs(); for (int i = 0; i < numDocs; i++) { Document docToCheck = reader.Document(i); bool removeDocFromIndex = true; string filenameField = docToCheck.GetField("filename").StringValue(); string lastModified = (docToCheck.GetField("LastModified").StringValue()); foreach (IndexableFileInfo fi in fileInfos) { if (String.Compare(fi.Filename, filenameField, true) == 0 && DateTools.DateToString(fi.LastModified, DateTools.Resolution.SECOND) == lastModified) { removeDocFromIndex = false; break; } } // foreach if (removeDocFromIndex) { reader.DeleteDocument(i); if (!reader.HasDeletions()) { throw new Exception("error: deletion failed!!"); } } } // for each lucene doc } finally { reader.Close(); } LuceneIndexer indexer = new LuceneIndexer(LuceneIndexDir, indexCreationMode); // open up the index again indexer.CloseIndexWriter(OptimizeMode.DoOptimization); // just to optimize the index (which removes deleted items). }
public virtual void TestBinaryFieldInIndex() { IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES); IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); // binary fields with store off are not allowed Assert.Throws <ArgumentException>( () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO)); Document doc = new Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /* test for field count */ Assert.AreEqual(2, doc.fields_ForNUnit.Count); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary stored field and compare it's content with the original one */ System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored", null))); Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored)); /* fetch the string field and compare it's content with the original one */ System.String stringFldStoredTest = docFromReader.Get("stringStored", null); Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored)); /* delete the document from index */ reader.DeleteDocument(0, null); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); dir.Close(); }
public void TestFieldNonExistent() { try { indexReader = IndexReader.Open(store, true); ld = new LuceneDictionary(indexReader, "nonexistent_field"); it = ld.GetWordsIterator(); AssertFalse("More elements than expected", it.HasNext()); AssertTrue("Nonexistent element is really null", it.Next() == null); } finally { if (indexReader != null) { indexReader.Close(); } } }
protected override void Dispose(bool disposing) { if (isDisposed) { return; } if (disposing) { if (closeReader) { reader.Close(); } } isDisposed = true; }
public virtual void TestCaching() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); TokenStream stream = new AnonymousClassTokenStream(this); stream = new CachingTokenFilter(stream); doc.Add(new Field("preanalyzed", stream, TermVector.NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream.Reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly writer.AddDocument(doc, null); writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(1, termPositions.Freq); Assert.AreEqual(0, termPositions.NextPosition(null)); termPositions.Seek(new Term("preanalyzed", "term2"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(2, termPositions.Freq); Assert.AreEqual(1, termPositions.NextPosition(null)); Assert.AreEqual(3, termPositions.NextPosition(null)); termPositions.Seek(new Term("preanalyzed", "term3"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(1, termPositions.Freq); Assert.AreEqual(2, termPositions.NextPosition(null)); reader.Close(); // 3) reset stream and consume tokens again stream.Reset(); checkTokens(stream); }
public virtual void TestMissingTerms() { System.String fieldName = "field1"; MockRAMDirectory rd = new MockRAMDirectory(); IndexWriter w = new IndexWriter(rd, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED, null); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED)); w.AddDocument(doc, null); } w.Close(); IndexReader reader = IndexReader.Open((Directory)rd, true, null); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.NumDocs(); ScoreDoc[] results; MatchAllDocsQuery q = new MatchAllDocsQuery(); System.Collections.ArrayList terms = new System.Collections.ArrayList(); terms.Add("5"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(0, results.Length, "Must match nothing"); terms = new System.Collections.ArrayList(); terms.Add("10"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(1, results.Length, "Must match 1"); terms = new System.Collections.ArrayList(); terms.Add("10"); terms.Add("20"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(2, results.Length, "Must match 2"); reader.Close(); rd.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader, null); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestSetNorm_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); // add the same document four times IFieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED); Document d1 = new Document(); d1.Add(f1); writer.AddDocument(d1, null); writer.AddDocument(d1, null); writer.AddDocument(d1, null); writer.AddDocument(d1, null); writer.Close(); // reset the boost of each instance of this document IndexReader reader = IndexReader.Open((Directory)store, false, null); reader.SetNorm(0, "field", 1.0f, null); reader.SetNorm(1, "field", 2.0f, null); reader.SetNorm(2, "field", 4.0f, null); reader.SetNorm(3, "field", 16.0f, null); reader.Close(); // check that searches are ordered by this boost float[] scores = new float[4]; new IndexSearcher(store, true, null).Search(new TermQuery(new Term("field", "word")), new AnonymousClassCollector(scores, this), null); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(16, hits.Length); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (!autoCommit) { Assert.AreEqual(3 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestKeepLastNDeletionPolicy() { int N = 5; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; Directory dir = new RAMDirectory(); KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); for (int j = 0; j < N + 1; j++) { IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 17; i++) { AddDoc(writer); } writer.Optimize(); writer.Close(); } Assert.IsTrue(policy.numDelete > 0); Assert.AreEqual(N + 1, policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 1); } else { Assert.AreEqual(N + 1, policy.numOnCommit); } // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); if (i == N) { Assert.Fail("should have failed on commits prior to last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestOpenPriorSnapshot() { // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new MockRAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) { writer.Commit(); } } writer.Close(); System.Collections.ICollection commits = IndexReader.ListCommits(dir); Assert.AreEqual(6, commits.Count); IndexCommit lastCommit = null; System.Collections.IEnumerator it = commits.GetEnumerator(); while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; if (lastCommit == null || commit.GetGeneration() > lastCommit.GetGeneration()) { lastCommit = commit; } } Assert.IsTrue(lastCommit != null); // Now add 1 doc and optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs()); writer.Optimize(); writer.Close(); Assert.AreEqual(7, IndexReader.ListCommits(dir).Count); // Now open writer on the commit just before optimize: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Should undo our rollback: writer.Rollback(); IndexReader r = IndexReader.Open(dir); // Still optimized, still 11 docs Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(11, r.NumDocs()); r.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Commits the rollback: writer.Close(); // Now 8 because we made another commit Assert.AreEqual(8, IndexReader.ListCommits(dir).Count); r = IndexReader.Open(dir); // Not optimized because we rolled it back, and now only // 10 docs Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Reoptimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.Optimize(); writer.Close(); r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Now open writer on the commit just before optimize, // but this time keeping only the last commit: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Reader still sees optimized index, because writer // opened on the prior commit has not yet committed: r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); writer.Close(); // Now reader sees unoptimized index: r = IndexReader.Open(dir); Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); dir.Close(); }
public virtual void TestDeletedDocs() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 19; i++) { writer.AddDocument(doc, null); } writer.Close(); IndexReader reader = IndexReader.Open((Directory)dir, false, null); reader.DeleteDocument(5, null); reader.Close(); System.IO.MemoryStream bos = new System.IO.MemoryStream(1024); CheckIndex checker = new CheckIndex(dir); checker.SetInfoStream(new System.IO.StreamWriter(bos)); //checker.setInfoStream(System.out); CheckIndex.Status indexStatus = checker.CheckIndex_Renamed_Method(null); if (indexStatus.clean == false) { System.Console.Out.WriteLine("CheckIndex failed"); using (var sr = new StreamReader(bos)) { System.Console.Out.WriteLine(sr.ReadToEnd()); Assert.Fail(); } } CheckIndex.Status.SegmentInfoStatus seg = (CheckIndex.Status.SegmentInfoStatus)indexStatus.segmentInfos[0]; Assert.IsTrue(seg.openReaderPassed); Assert.IsNotNull(seg.diagnostics); Assert.IsNotNull(seg.fieldNormStatus); Assert.IsNull(seg.fieldNormStatus.error); Assert.AreEqual(1, seg.fieldNormStatus.totFields); Assert.IsNotNull(seg.termIndexStatus); Assert.IsNull(seg.termIndexStatus.error); Assert.AreEqual(1, seg.termIndexStatus.termCount); Assert.AreEqual(19, seg.termIndexStatus.totFreq); Assert.AreEqual(18, seg.termIndexStatus.totPos); Assert.IsNotNull(seg.storedFieldStatus); Assert.IsNull(seg.storedFieldStatus.error); Assert.AreEqual(18, seg.storedFieldStatus.docCount); Assert.AreEqual(18, seg.storedFieldStatus.totFields); Assert.IsNotNull(seg.termVectorStatus); Assert.IsNull(seg.termVectorStatus.error); Assert.AreEqual(18, seg.termVectorStatus.docCount); Assert.AreEqual(18, seg.termVectorStatus.totVectors); Assert.IsTrue(seg.diagnostics.Count > 0); List <string> onlySegments = new List <string>(); onlySegments.Add("_0"); Assert.IsTrue(checker.CheckIndex_Renamed_Method(onlySegments, null).clean == true); }
public void TestSpellchecker() { SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory()); indexReader = IndexReader.Open(store); sc.IndexDictionary(new LuceneDictionary(indexReader, "contents")); String[] suggestions = sc.SuggestSimilar("Tam", 1); AssertEquals(1, suggestions.Length); AssertEquals("Tom", suggestions[0]); suggestions = sc.SuggestSimilar("Jarry", 1); AssertEquals(1, suggestions.Length); AssertEquals("Jerry", suggestions[0]); indexReader.Close(); }
public void TestFieldContents_1() { try { indexReader = IndexReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetWordsIterator(); AssertTrue("First element doesn't exist.", it.HasNext()); AssertTrue("First element isn't correct", it.Next().Equals("Jerry")); AssertTrue("Second element doesn't exist.", it.HasNext()); AssertTrue("Second element isn't correct", it.Next().Equals("Tom")); AssertFalse("More elements than expected", it.HasNext()); AssertTrue("Nonexistent element is really null", it.Next() == null); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetWordsIterator(); int counter = 2; while (it.HasNext()) { it.Next(); counter--; } AssertTrue("Number of words incorrect", counter == 0); } finally { if (indexReader != null) { indexReader.Close(); } } }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexWithAdds(System.String dirName) { System.String origDirName = dirName; dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // open writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origDirName, "24") < 0) { expected = 45; } else { expected = 46; } Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count"); writer.Close(); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); TestHits(hits, 44, searcher.IndexReader); searcher.Close(); // make sure we can do delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 43, searcher.IndexReader); searcher.Close(); // optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); TestHits(hits, 43, searcher.IndexReader); Assert.AreEqual("22", d.Get("id"), "wrong first document"); searcher.Close(); dir.Close(); }
protected internal override void DoClose() { in_Renamed.Close(); }
private void CloseIndexReader(IndexReader myReader) { myReader.Close(); myReader.Dispose(); }
private void CloseInternalReader(bool trace, IndexReader subReader, bool finalClose) { ReaderData readerData; // TODO: can we avoid the lock? lock (semaphoreIndexReaderLock) { readerData = searchIndexReaderSemaphores[subReader]; } if (readerData == null) { log.Error("Trying to close a Lucene IndexReader not present: " + subReader.Directory()); // TODO: Should we try to close? return; } // Acquire the locks in the same order as everywhere else object directoryProviderLock = perDirectoryProviderManipulationLocks[readerData.Provider]; bool closeReader = false; lock (directoryProviderLock) { IndexReader reader; bool isActive = activeSearchIndexReaders.TryGetValue(readerData.Provider, out reader) && reader == subReader; if (trace) log.Info("IndexReader not active: " + subReader); lock (semaphoreIndexReaderLock) { readerData = searchIndexReaderSemaphores[subReader]; if (readerData == null) { log.Error("Trying to close a Lucene IndexReader not present: " + subReader.Directory()); // TODO: Should we try to close? return; } //final close, the semaphore should be at 0 already if (!finalClose) { readerData.Semaphore--; if (trace) log.Info("Semaphore decreased to: " + readerData.Semaphore + " for " + subReader); } if (readerData.Semaphore < 0) log.Error("Semaphore negative: " + subReader.Directory()); if (!isActive && readerData.Semaphore == 0) { searchIndexReaderSemaphores.Remove(subReader); closeReader = true; } else closeReader = false; } } if (closeReader) { if (trace) log.Info("Closing IndexReader: " + subReader); try { subReader.Close(); } catch (IOException e) { log.Warn("Unable to close Lucene IndexReader", e); } } }
public virtual void TestDeleteLeftoverFiles() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); int i; for (i = 0; i < 35; i++) { AddDoc(writer, i); } writer.SetUseCompoundFile(false); for (; i < 45; i++) { AddDoc(writer, i); } writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // Now, artificially create an extra .del file & extra // .s0 file: System.String[] files = dir.ListAll(); /* * for(int j=0;j<files.length;j++) { * System.out.println(j + ": " + files[j]); * } */ // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); int contentFieldIndex = -1; for (i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); System.String normSuffix = "s" + contentFieldIndex; // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already: CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix); // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already, using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already: CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex); // Create a bogus separate del file for a // segment that already has a separate del file: CopyFile(dir, "_0_1.del", "_0_2.del"); // Create a bogus separate del file for a // segment that does not yet have a separate del file: CopyFile(dir, "_0_1.del", "_1_1.del"); // Create a bogus separate del file for a // non-existent segment: CopyFile(dir, "_0_1.del", "_188_1.del"); // Create a bogus segment file: CopyFile(dir, "_0.cfs", "_188.cfs"); // Create a bogus fnm file when the CFS already exists: CopyFile(dir, "_0.cfs", "_0.fnm"); // Create a deletable file: CopyFile(dir, "_0.cfs", "deletable"); // Create some old segments file: CopyFile(dir, "segments_3", "segments"); CopyFile(dir, "segments_3", "segments_2"); // Create a bogus cfs file shadowing a non-cfs segment: CopyFile(dir, "_2.cfs", "_3.cfs"); System.String[] filesPre = dir.ListAll(); // Open & close a writer: it should delete the above 4 // files and nothing more: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); System.String[] files2 = dir.ListAll(); dir.Close(); System.Array.Sort(files); System.Array.Sort(files2); System.Collections.Hashtable dif = DifFiles(files, files2); if (!SupportClass.CollectionsHelper.Equals(files, files2)) { Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n " + AsString(files) + "\n actual files:\n " + AsString(files2) + "\ndif: " + SupportClass.CollectionsHelper.CollectionToString(dif)); } }
static public void ReleaseReader (IndexReader reader) { lock (reader_rav_map) { ReaderAndVersion rav = (ReaderAndVersion) reader_rav_map [reader]; if (rav != null) UnrefReaderAndVersion_Unlocked (rav); else reader.Close (); } }
// Special function to permanently remove this reader // Called when StaticQueryables need to unload static internal void CloseReader (IndexReader reader) { lock (reader_rav_map) { ReaderAndVersion rav = (ReaderAndVersion) reader_rav_map [reader]; if (rav != null) { rav.Refcount --; // Remove one extra refcount that was added to keep the reader opened UnrefReaderAndVersion_Unlocked (rav); } else reader.Close (); } }
public virtual void TestExactFileNames() { System.String outputDir = "lucene.backwardscompat0.index"; RmDir(outputDir); try { Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir))); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(16.0); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); int contentFieldIndex = -1; for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); // Now verify file names: System.String[] expected; expected = new System.String[] { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" }; System.String[] actual = dir.ListAll(); System.Array.Sort(expected); System.Array.Sort(actual); if (!CollectionsHelper.Equals(expected, actual)) { Assert.Fail("incorrect filenames in index: expected:\n " + AsString(expected) + "\n actual:\n " + AsString(actual)); } dir.Close(); } finally { RmDir(outputDir); } }
public void TestFieldContents_2() { try { indexReader = IndexReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetWordsIterator(); // hasNext() should have no side effects //{{DIGY}} But has. Need a fix? //AssertTrue("First element isn't were it should be.", it.HasNext()); //AssertTrue("First element isn't were it should be.", it.HasNext()); //AssertTrue("First element isn't were it should be.", it.HasNext()); // just iterate through words AssertTrue("First element isn't correct", it.Next().Equals("Jerry")); AssertTrue("Second element isn't correct", it.Next().Equals("Tom")); AssertTrue("Nonexistent element is really null", it.Next() == null); // hasNext() should still have no side effects ... AssertFalse("There should be any more elements", it.HasNext()); AssertFalse("There should be any more elements", it.HasNext()); AssertFalse("There should be any more elements", it.HasNext()); // .. and there are really no more words AssertTrue("Nonexistent element is really null", it.Next() == null); AssertTrue("Nonexistent element is really null", it.Next() == null); AssertTrue("Nonexistent element is really null", it.Next() == null); } finally { if (indexReader != null) { indexReader.Close(); } } }
public virtual void TestExpirationTimeDeletionPolicy() { double SECONDS = 2.0; bool autoCommit = false; bool useCompoundFile = true; Directory dir = new RAMDirectory(); ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); long lastDeleteTime = 0; for (int i = 0; i < 7; i++) { // Record last time when writer performed deletes of // past commits lastDeleteTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } writer.Close(); // Make sure to sleep long enough so that some commit // points will be deleted: System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int)(1000.0 * (SECONDS / 5.0)))); } // First, make sure the policy in fact deleted something: Assert.IsTrue(policy.numDelete > 0, "no commits were deleted"); // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); while (gen > 0) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); long modTime = dir.FileModified(fileName); Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted"); } catch (System.IO.IOException e) { // OK break; } dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.Close(); }
public void TestFieldAaa() { try { indexReader = IndexReader.Open(store); ld = new LuceneDictionary(indexReader, "aaa"); it = ld.GetWordsIterator(); AssertTrue("First element doesn't exist.", it.HasNext()); AssertTrue("First element isn't correct", it.Next().Equals("foo")); AssertFalse("More elements than expected", it.HasNext()); AssertTrue("Nonexistent element is really null", it.Next() == null); } finally { if (indexReader != null) { indexReader.Close(); } } }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.SetMergeScheduler(new SerialMergeScheduler()); for (int i = 0; i < 107; i++) { AddDoc(writer); if (autoCommit && i % 10 == 0) { writer.Commit(); } } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (!autoCommit) { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Test listCommits System.Collections.ICollection commits = IndexReader.ListCommits(dir); if (!autoCommit) { // 1 from opening writer + 2 from closing writer Assert.AreEqual(3, commits.Count); } // 1 from opening writer + 2 from closing writer + // 11 from calling writer.commit() explicitly above else { Assert.AreEqual(14, commits.Count); } System.Collections.IEnumerator it = commits.GetEnumerator(); // Make sure we can open a reader on each commit: while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; IndexReader r = IndexReader.Open(commit, null); r.Close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }
public virtual void TestMapper() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, mapper); var set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Check offsets and positions for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(1, mapper); set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Should have offsets and positions b/c we are munging all the fields together for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); var map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } //Try mapper that ignores offs and positions fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } // test setDocumentNumber() IndexReader ir = IndexReader.Open(dir, true); DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber()); ir.GetTermFreqVector(0, docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, "f2", docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); ir.Close(); }
public virtual void TestUpdateDocument() { bool optimize = true; Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); // create the index CreateIndexNoClose(!optimize, "index1", writer); // writer.flush(false, true, true); // get a reader IndexReader r1 = writer.GetReader(); Assert.IsTrue(r1.IsCurrent()); System.String id10 = r1.Document(10).GetField("id").StringValue(); Document newDoc = r1.Document(10); newDoc.RemoveField("id"); newDoc.Add(new Field("id", System.Convert.ToString(8000), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.UpdateDocument(new Term("id", id10), newDoc); Assert.IsFalse(r1.IsCurrent()); IndexReader r2 = writer.GetReader(); Assert.IsTrue(r2.IsCurrent()); Assert.AreEqual(0, Count(new Term("id", id10), r2)); Assert.AreEqual(1, Count(new Term("id", System.Convert.ToString(8000)), r2)); r1.Close(); writer.Close(); Assert.IsTrue(r2.IsCurrent()); IndexReader r3 = IndexReader.Open(dir1); Assert.IsTrue(r3.IsCurrent()); Assert.IsTrue(r2.IsCurrent()); Assert.AreEqual(0, Count(new Term("id", id10), r3)); Assert.AreEqual(1, Count(new Term("id", System.Convert.ToString(8000)), r3)); writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); Assert.IsTrue(r2.IsCurrent()); Assert.IsTrue(r3.IsCurrent()); writer.Close(); Assert.IsFalse(r2.IsCurrent()); Assert.IsTrue(!r3.IsCurrent()); r2.Close(); r3.Close(); dir1.Close(); }
private IndexReader ReplaceActiveReader(IndexReader outOfDateReader, object directoryProviderLock, IDirectoryProvider directoryProvider, IndexReader[] readers) { bool trace = log.IsInfoEnabled; IndexReader oldReader; bool closeOldReader = false; bool closeOutOfDateReader = false; IndexReader reader; /** * Since out of lock protection, can have multiple readers created in // * not worse than NotShared and limit the locking time, hence scalability */ try { reader = IndexReader.Open(directoryProvider.Directory); } catch (IOException e) { throw new SearchException("Unable to open Lucene IndexReader", e); } lock (directoryProviderLock) { // Since not protected by a lock, other ones can have been added oldReader = activeSearchIndexReaders[directoryProvider] = reader; lock (semaphoreIndexReaderLock) { searchIndexReaderSemaphores[reader] = new ReaderData(1, directoryProvider); if (trace) log.Info("Semaphore: 1 for " + reader); if (outOfDateReader != null) { ReaderData readerData; searchIndexReaderSemaphores.TryGetValue(outOfDateReader, out readerData); if (readerData == null) { closeOutOfDateReader = false; //already removed by another prevous thread } else if (readerData.Semaphore == 0) { searchIndexReaderSemaphores.Remove(outOfDateReader); closeOutOfDateReader = true; } else { closeOutOfDateReader = false; } } if (oldReader != null && oldReader != outOfDateReader) { ReaderData readerData = searchIndexReaderSemaphores[oldReader]; if (readerData == null) { log.Warn("Semaphore should not be null"); closeOldReader = true; //TODO should be true or false? } else if (readerData.Semaphore == 0) { searchIndexReaderSemaphores.Remove(oldReader); closeOldReader = true; } else { closeOldReader = false; } } } } if (closeOutOfDateReader) { if (trace) log.Info("Closing out of date IndexReader " + outOfDateReader); try { outOfDateReader.Close(); } catch (IOException e) { ReaderProviderHelper.Clean(readers); throw new SearchException("Unable to close Lucene IndexReader", e); } } if (closeOldReader) { if (trace) log.Info("Closing old IndexReader " + oldReader); try { oldReader.Close(); } catch (IOException e) { ReaderProviderHelper.Clean(readers); throw new SearchException("Unable to close Lucene IndexReader", e); } } return reader; }
public virtual void TestDuringAddIndexes() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.SetMergeFactor(2); // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r).Search(q, 10).totalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); writer.Close(); _TestUtil.CheckIndex(dir1); r.Close(); dir1.Close(); }