private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document(), null); } w.Commit(null); w.DeleteDocuments(null, new MatchAllDocsQuery()); w.Commit(null); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(null), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open((Directory)d, true, null); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
public virtual void TestIndexWriterDirtSimple() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); TieredMergePolicy tmp = NewTieredMergePolicy(); iwc.SetMergePolicy(tmp); iwc.SetMaxBufferedDocs(2); tmp.MaxMergeAtOnce = 100; tmp.SegmentsPerTier = 100; tmp.ForceMergeDeletesPctAllowed = 30.0; IndexWriter w = new IndexWriter(dir, iwc); int numDocs = 2; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO)); w.AddDocument(doc); } Assert.AreEqual(numDocs, w.MaxDoc); Assert.AreEqual(numDocs, w.NumDocs()); }
public virtual void RunTest(Random random, Directory directory) { IndexWriter writer = new IndexWriter(directory, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2)).SetMergePolicy(NewLogMergePolicy())); for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 1000; FieldType customType = new FieldType(StringField.TYPE_STORED); customType.OmitNorms = true; for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(NewField("id", Convert.ToString(i), customType)); d.Add(NewField("contents", English.IntToEnglish(i), customType)); writer.AddDocument(d); } ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 4; ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new ThreadAnonymousInnerClassHelper(this, iterFinal, customType, iFinal, writerFinal); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); } Assert.IsTrue(!Failed); int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); Assert.AreEqual(expectedDocCount, writer.NumDocs(), "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config); Assert.AreEqual(expectedDocCount, writer.MaxDoc, "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config); writer.Dispose(); writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(2)); DirectoryReader reader = DirectoryReader.Open(directory); Assert.AreEqual(1, reader.Leaves.Count, "reader=" + reader); Assert.AreEqual(expectedDocCount, reader.NumDocs); reader.Dispose(); } writer.Dispose(); }
public virtual void TestForceMergeDeletes() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); TieredMergePolicy tmp = NewTieredMergePolicy(); conf.SetMergePolicy(tmp); conf.SetMaxBufferedDocs(4); tmp.MaxMergeAtOnce = 100; tmp.SegmentsPerTier = 100; tmp.ForceMergeDeletesPctAllowed = 30.0; IndexWriter w = new IndexWriter(dir, conf); for (int i = 0; i < 80; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO)); w.AddDocument(doc); } Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(80, w.NumDocs()); if (VERBOSE) { Console.WriteLine("\nTEST: delete docs"); } w.DeleteDocuments(new Term("content", "0")); w.ForceMergeDeletes(); Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(60, w.NumDocs()); if (VERBOSE) { Console.WriteLine("\nTEST: forceMergeDeletes2"); } ((TieredMergePolicy)w.Config.MergePolicy).ForceMergeDeletesPctAllowed = 10.0; w.ForceMergeDeletes(); Assert.AreEqual(60, w.NumDocs()); Assert.AreEqual(60, w.MaxDoc); w.Dispose(); dir.Dispose(); }
public void TestDeleteByTermIsCurrent() { // get reader IndexReader reader = writer.GetReader(); // assert index has a document and reader is up2date Assert.AreEqual(1, writer.NumDocs(), "One document should be in the index"); Assert.IsTrue(reader.IsCurrent(), "Document added, reader should be stale "); // remove document Term idTerm = new Term("UUID", "1"); writer.DeleteDocuments(idTerm); writer.Commit(); // assert document has been deleted (index changed), reader is stale Assert.AreEqual(0, writer.NumDocs(), "Document should be removed"); Assert.IsFalse(reader.IsCurrent(), "Reader should be stale"); reader.Close(); }
public virtual void TestStallControl() { int[] numThreads = new int[] { 4 + Random().Next(8), 1 }; int numDocumentsToIndex = 50 + Random().Next(50); for (int i = 0; i < numThreads.Length; i++) { AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); MockDirectoryWrapper dir = NewMockDirectory(); // mock a very slow harddisk sometimes here so that flushing is very slow dir.Throttling = MockDirectoryWrapper.Throttling_e.SOMETIMES; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); iwc.SetFlushPolicy(flushPolicy); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2); iwc.SetIndexerThreadPool(threadPool); // with such a small ram buffer we should be stalled quiet quickly iwc.SetRAMBufferSizeMB(0.25); IndexWriter writer = new IndexWriter(dir, iwc); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); if (numThreads[i] == 1) { Assert.IsFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.FlushControl.StallControl.HasBlocked()); } if (docsWriter.FlushControl.PeakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d)) { Assert.IsTrue(docsWriter.FlushControl.StallControl.WasStalled()); } AssertActiveBytesAfter(flushControl); writer.Dispose(true); dir.Dispose(); } }
public virtual void ChangeIndexWithAdds(Random random, Directory dir, string origOldName) { // open writer IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy())); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origOldName, "24") < 0) { expected = 44; } else { expected = 45; } Assert.AreEqual(expected, writer.NumDocs(), "wrong doc count"); writer.Dispose(); // make sure searching sees right # hits IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.IndexReader.Document(hits[0].Doc); assertEquals("wrong first document", "0", d.Get("id")); DoTestHits(hits, 44, searcher.IndexReader); reader.Dispose(); // fully merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy())); writer.ForceMerge(1); writer.Dispose(); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(44, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); DoTestHits(hits, 44, searcher.IndexReader); assertEquals("wrong first document", "0", d.Get("id")); reader.Dispose(); }
static void Main(string[] args) { // default AzureDirectory stores cache in local temp folder var azureDirectory = new AzureDirectory(CloudStorageAccount.Parse(ConfigurationManager.AppSettings["blobStorage"]), "TestCatalog6"); var findexExists = IndexReader.IndexExists(azureDirectory); IndexWriter indexWriter = null; while (indexWriter == null) { try { indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), !IndexReader.IndexExists(azureDirectory), new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)); } catch (LockObtainFailedException) { Console.WriteLine("Lock is taken, Hit 'Y' to clear the lock, or anything else to try again"); if (Console.ReadLine().ToLower().Trim() == "y" ) azureDirectory.ClearLock("write.lock"); } }; Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index"); indexWriter.SetRAMBufferSizeMB(10.0); //indexWriter.SetUseCompoundFile(false); //indexWriter.SetMaxMergeDocs(10000); //indexWriter.SetMergeFactor(100); for (int iDoc = 0; iDoc < 10000; iDoc++) { if (iDoc % 10 == 0) Console.WriteLine(iDoc); var doc = new Document(); doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); indexWriter.AddDocument(doc); } Console.WriteLine("Total docs is {0}", indexWriter.NumDocs()); indexWriter.Dispose(); IndexSearcher searcher; using (new AutoStopWatch("Creating searcher")) { searcher = new IndexSearcher(azureDirectory); } SearchForPhrase(searcher, "dog"); SearchForPhrase(searcher, _random.Next(32768).ToString()); SearchForPhrase(searcher, _random.Next(32768).ToString()); Console.Read(); }
public virtual void TestFlushDocCount() { int[] numThreads = new int[] { 2 + AtLeast(1), 1 }; for (int i = 0; i < numThreads.Length; i++) { int numDocumentsToIndex = 50 + AtLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetMaxBufferedDocs(2 + AtLeast(10)); iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsTrue(flushPolicy.FlushOnDocCount()); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms()); Assert.IsFalse(flushPolicy.FlushOnRAM()); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads[i]); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); Assert.IsTrue(flushPolicy.PeakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes()); dir.Dispose(); } }
protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); analyzer.AddAnalyzer("Id", new IdentifierKeywordAnalyzer()); int i = 0; using (IndexWriter writer = new IndexWriter(_directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED)) { foreach (JObject item in items) { i++; string id = item["nuget:id"].ToString(); string version = item["nuget:version"].ToString(); BooleanQuery query = new BooleanQuery(); query.Add(new BooleanClause(new TermQuery(new Term("Id", id.ToLowerInvariant())), Occur.MUST)); query.Add(new BooleanClause(new TermQuery(new Term("Version", version)), Occur.MUST)); writer.DeleteDocuments(query); Document doc = new Document(); doc.Add(new Field("Id", item["nuget:id"].ToString(), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Version", item["nuget:version"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } string trace = Guid.NewGuid().ToString(); writer.Commit(new Dictionary<string, string> { { "commitTimeStamp", commitTimeStamp.ToString("O") }, { "trace", trace } }); Trace.TraceInformation("COMMIT {0} documents, index contains {1} documents, commitTimeStamp {2}, trace: {3}", i, writer.NumDocs(), commitTimeStamp.ToString("O"), trace); } return Task.FromResult(true); }
public virtual void TestIndexWriter_LUCENE4656() { Store.Directory directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); TokenStream ts = new EmptyTokenStream(); assertFalse(ts.HasAttribute<ITermToBytesRefAttribute>()); Document doc = new Document(); doc.Add(new StringField("id", "0", Field.Store.YES)); doc.Add(new TextField("description", ts)); // this should not fail because we have no TermToBytesRefAttribute writer.AddDocument(doc); assertEquals(1, writer.NumDocs()); writer.Dispose(); directory.Dispose(); }
public void TestReadAndWrite() { var connectionString = Environment.GetEnvironmentVariable("DataConnectionString") ?? "UseDevelopmentStorage=true"; var cloudStorageAccount = CloudStorageAccount.Parse(connectionString); // default AzureDirectory stores cache in local temp folder var azureDirectory = new AzureDirectory(cloudStorageAccount, "testcatalog"); using (var indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), !IndexReader.IndexExists(azureDirectory), new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH))) { indexWriter.SetRAMBufferSizeMB(10.0); for (int iDoc = 0; iDoc < 10000; iDoc++) { var doc = new Document(); doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString() + "-" + iDoc.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); indexWriter.AddDocument(doc); } Console.WriteLine("Total docs is {0}", indexWriter.NumDocs()); } using (var searcher = new IndexSearcher(azureDirectory)) { Assert.AreNotEqual(0, SearchForPhrase(searcher, "dog")); Assert.AreNotEqual(0, SearchForPhrase(searcher, "cat")); Assert.AreNotEqual(0, SearchForPhrase(searcher, "car")); } // check the container exists, and delete it var blobClient = cloudStorageAccount.CreateCloudBlobClient(); var container = blobClient.GetContainerReference("testcatalog"); Assert.IsTrue(container.Exists()); // check the container exists container.Delete(); }
public virtual void TestRollingUpdates_Mem() { Random random = new Random(Random().Next()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); //provider.register(new MemoryCodec()); if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean()) { Codec.Default = TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(Random().nextBoolean(), random.NextFloat())); } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble()))); if (VERBOSE) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).StringValue = myID; Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (VERBOSE) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && Random().Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs()); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.SizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }
public virtual void TestFutureCommit() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); Document doc = new Document(); w.AddDocument(doc); // commit to "first" IDictionary<string, string> commitData = new Dictionary<string, string>(); commitData["tag"] = "first"; w.CommitData = commitData; w.Commit(); // commit to "second" w.AddDocument(doc); commitData["tag"] = "second"; w.CommitData = commitData; w.Dispose(); // open "first" with IndexWriter IndexCommit commit = null; foreach (IndexCommit c in DirectoryReader.ListCommits(dir)) { if (c.UserData["tag"].Equals("first")) { commit = c; break; } } Assert.IsNotNull(commit); w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).SetIndexCommit(commit)); Assert.AreEqual(1, w.NumDocs()); // commit IndexWriter to "third" w.AddDocument(doc); commitData["tag"] = "third"; w.CommitData = commitData; w.Dispose(); // make sure "second" commit is still there commit = null; foreach (IndexCommit c in DirectoryReader.ListCommits(dir)) { if (c.UserData["tag"].Equals("second")) { commit = c; break; } } Assert.IsNotNull(commit); dir.Dispose(); }
public virtual void TestDocCount() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; IndexWriter.SetDefaultWriteLockTimeout(2000); Assert.AreEqual(2000, IndexWriter.GetDefaultWriteLockTimeout()); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); IndexWriter.SetDefaultWriteLockTimeout(1000); // add 100 documents for (i = 0; i < 100; i++) { AddDoc(writer); } Assert.AreEqual(100, writer.DocCount()); writer.Close(); // delete 40 documents reader = IndexReader.Open(dir); for (i = 0; i < 40; i++) { reader.DeleteDocument(i); } reader.Close(); // test doc count before segments are merged/index is optimized writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Assert.AreEqual(100, writer.DocCount()); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, true, new WhitespaceAnalyzer()); Assert.AreEqual(100, writer.MaxDoc()); Assert.AreEqual(60, writer.NumDocs()); writer.Optimize(); Assert.AreEqual(60, writer.MaxDoc()); Assert.AreEqual(60, writer.NumDocs()); writer.Close(); // check that the index reader gives the same numbers. reader = IndexReader.Open(dir); Assert.AreEqual(60, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // make sure opening a new index for create over // this existing one works correctly: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Assert.AreEqual(0, writer.MaxDoc()); Assert.AreEqual(0, writer.NumDocs()); writer.Close(); }
public void TestFutureCommit() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), new NoDeletionPolicy(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); w.AddDocument(doc); // commit to "first" System.Collections.Generic.Dictionary<string, string> commitData = new System.Collections.Generic.Dictionary<string, string>(); commitData["tag"]="first"; w.Commit(commitData); // commit to "second" w.AddDocument(doc); commitData["tag"]="second"; w.Commit(commitData); w.Close(); // open "first" with IndexWriter IndexCommit commit = null; System.Collections.IEnumerator it = IndexReader.ListCommits(dir).GetEnumerator(); while (it.MoveNext()) { IndexCommit c = (IndexCommit)it.Current; string tag = (String)c.GetUserData()["tag"]; if ("first".Equals(tag)) { commit = c; break; } } Assert.NotNull(commit); w = new IndexWriter(dir, new WhitespaceAnalyzer(), new NoDeletionPolicy(), IndexWriter.MaxFieldLength.UNLIMITED, commit); Assert.AreEqual(1, w.NumDocs()); // commit IndexWriter to "third" w.AddDocument(doc); commitData["tag"]="third"; w.Commit(commitData); w.Close(); // make sure "second" commit is still there commit = null; it = IndexReader.ListCommits(dir).GetEnumerator(); while (it.MoveNext()) { IndexCommit c = (IndexCommit)it.Current; string tag = (String)c.GetUserData()["tag"]; if ("second".Equals(tag)) { commit = c; break; } } Assert.NotNull(commit); IndexReader r = IndexReader.Open(commit, true); Assert.AreEqual(2, r.NumDocs()); r.Close(); // open "second", w/ writeable IndexReader & commit r = IndexReader.Open(commit, new NoDeletionPolicy(), false); Assert.AreEqual(2, r.NumDocs()); r.DeleteDocument(0); r.DeleteDocument(1); commitData["tag"]="fourth"; r.Commit(commitData); r.Close(); // make sure "third" commit is still there commit = null; it = IndexReader.ListCommits(dir).GetEnumerator(); while (it.MoveNext()) { IndexCommit c = (IndexCommit)it.Current; string tag = (String)c.GetUserData()["tag"]; if ("third".Equals(tag)) { commit = c; break; } } Assert.NotNull(commit); dir.Close(); }
public virtual void TestRandom() { int numThreads = 1 + Random().Next(8); int numDocumentsToIndex = 50 + AtLeast(70); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); iwc.SetFlushPolicy(flushPolicy); int numDWPT = 1 + Random().Next(8); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); if (flushPolicy.FlushOnRAM() && !flushPolicy.FlushOnDocCount() && !flushPolicy.FlushOnDeleteTerms()) { long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); if (flushPolicy.HasMarkedPending) { assertTrue("max: " + maxRAMBytes + " " + flushControl.PeakActiveBytes, maxRAMBytes <= flushControl.PeakActiveBytes); } } AssertActiveBytesAfter(flushControl); writer.Commit(); Assert.AreEqual(0, flushControl.ActiveBytes()); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(numDocumentsToIndex, r.NumDocs); Assert.AreEqual(numDocumentsToIndex, r.MaxDoc); if (!flushPolicy.FlushOnRAM()) { assertFalse("never stall if we don't flush on RAM", docsWriter.FlushControl.StallControl.WasStalled()); assertFalse("never block if we don't flush on RAM", docsWriter.FlushControl.StallControl.HasBlocked()); } r.Dispose(); writer.Dispose(); dir.Dispose(); }
//[HttpPost] //public ActionResult GetAutoComplete(Models.Search model) //{ // var result = PerformAutoCompleteLookup(model.Phrase, this.IndexDirectory); // var facetMap = result.FacetMap; // var titleFacets = facetMap["title"]; // var facetVals = titleFacets.GetFacets(); // foreach (var facet in facetVals) // { // model.Results.Add(facet.ToString()); // } // model.Phrase = string.Empty; // model.SelectionGroups.Clear(); // model.FacetGroups.Clear(); // return Json(model, "application/json"); //} private void IndexProducts(string indexPath) { DateTime startIndexing = DateTime.Now; Console.WriteLine("start indexing at: " + startIndexing); // read in the books xml var productsXml = new XmlDocument(); string productDataPath = Server.MapPath("~/ProductData/Products.xml"); productsXml.Load(productDataPath); // create the indexer with a standard analyzer //var indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), true); var directory = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath)); bool recreateIndex = true; //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); // get analyzer ISynonymEngine engine = new SpecialSynonymEngine(); Analyzer analyzer = new SynonymAnalyzer(Lucene.Net.Util.Version.LUCENE_29, engine); var indexWriter = new IndexWriter( directory, analyzer, recreateIndex, IndexWriter.MaxFieldLength.UNLIMITED); try { // loop through all the books in the books.xml foreach (XPathNavigator product in productsXml.CreateNavigator().Select("//product")) { // create a Lucene document for this book var doc = new Document(); // add the ID as stored but not indexed field, not used to query on //doc.Add(new Field("id", product.GetAttribute("id", string.Empty), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); // add the title and description as stored and tokenized fields, the analyzer processes the content doc.Add(new Field("title", product.SelectSingleNode("title").Value, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("Title2", product.SelectSingleNode("title").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("description", product.SelectSingleNode("description").Value, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); // add the title and genre as stored and un tokenized fields, the value is stored as is doc.Add(new Field("Material", product.SelectSingleNode("properties//material").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Style", product.SelectSingleNode("properties//style").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Mounting", product.SelectSingleNode("properties//mounting").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Brand", product.SelectSingleNode("properties//brand").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); // add the publication date as stored and un tokenized field, note the special date handling //DateTime publicationDate = DateTime.Parse(product.SelectSingleNode("publish_date").Value, CultureInfo.InvariantCulture); //doc.Add(new Field("publicationDate", DateField.DateToString(publicationDate), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); // add the document to the index indexWriter.AddDocument(doc); } // make lucene fast indexWriter.Optimize(); } finally { // close the index writer indexWriter.Dispose(); } DateTime endIndexing = DateTime.Now; Console.WriteLine("end indexing at: " + endIndexing); Console.WriteLine("Duration: " + (endIndexing - startIndexing).Seconds + " seconds"); Console.WriteLine("Number of indexed document: " + indexWriter.NumDocs()); }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); TaskScheduler es = Random().NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet<string> delIDs = new ConcurrentHashSet<string>(new HashSet<string>()); ISet<string> delPackIDs = new ConcurrentHashSet<string>(new HashSet<string>()); IList<SubDocs> allSubDocs = new SynchronizedCollection<SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray())); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) { es.shutdown(); es.awaitTermination(1, TimeUnit.SECONDS); }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
public virtual void TestOpenPriorSnapshot() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(new KeepAllDeletionPolicy(this, dir)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10))); KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy)writer.Config.DelPolicy; for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) { writer.Commit(); } } writer.Dispose(); ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); Assert.AreEqual(5, commits.Count); IndexCommit lastCommit = null; foreach (IndexCommit commit in commits) { if (lastCommit == null || commit.Generation > lastCommit.Generation) { lastCommit = commit; } } Assert.IsTrue(lastCommit != null); // Now add 1 doc and merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy)); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs()); writer.ForceMerge(1); writer.Dispose(); Assert.AreEqual(6, DirectoryReader.ListCommits(dir).Count); // Now open writer on the commit just before merge: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs()); // Should undo our rollback: writer.Rollback(); DirectoryReader r = DirectoryReader.Open(dir); // Still merged, still 11 docs Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(11, r.NumDocs); r.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs()); // Commits the rollback: writer.Dispose(); // Now 8 because we made another commit Assert.AreEqual(7, DirectoryReader.ListCommits(dir).Count); r = DirectoryReader.Open(dir); // Not fully merged because we rolled it back, and now only // 10 docs Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Re-merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy)); writer.ForceMerge(1); writer.Dispose(); r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Now open writer on the commit just before merging, // but this time keeping only the last commit: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs()); // Reader still sees fully merged index, because writer // opened on the prior commit has not yet committed: r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); writer.Dispose(); // Now reader sees not-fully-merged index: r = DirectoryReader.Open(dir); Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); dir.Dispose(); }
protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled) { int numDocumentsToIndex = 10 + AtLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetRAMBufferSizeMB(maxRamMB); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsFalse(flushPolicy.FlushOnDocCount()); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms()); Assert.IsTrue(flushPolicy.FlushOnRAM()); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); if (flushPolicy.HasMarkedPending) { Assert.IsTrue(maxRAMBytes < flushControl.PeakActiveBytes); } if (ensureNotStalled) { Assert.IsFalse(docsWriter.FlushControl.StallControl.WasStalled()); } writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes()); dir.Dispose(); }
public virtual void TestStallControl() { int[] numThreads = new int[] { 4 + Random().Next(8), 1 }; int numDocumentsToIndex = 50 + Random().Next(50); for (int i = 0; i < numThreads.Length; i++) { AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); MockDirectoryWrapper dir = NewMockDirectory(); // mock a very slow harddisk sometimes here so that flushing is very slow dir.Throttling = MockDirectoryWrapper.Throttling_e.SOMETIMES; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); iwc.SetFlushPolicy(flushPolicy); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2); iwc.SetIndexerThreadPool(threadPool); // with such a small ram buffer we should be stalled quiet quickly iwc.SetRAMBufferSizeMB(0.25); IndexWriter writer = new IndexWriter(dir, iwc); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); if (numThreads[i] == 1) { assertFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.FlushControl.StallControl.HasBlocked()); } if (docsWriter.FlushControl.PeakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d)) { Assert.IsTrue(docsWriter.FlushControl.StallControl.WasStalled()); } AssertActiveBytesAfter(flushControl); writer.Dispose(true); dir.Dispose(); } }
public virtual void TestExistingDeletes() { Directory[] dirs = new Directory[2]; for (int i = 0; i < dirs.Length; i++) { dirs[i] = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer = new IndexWriter(dirs[i], conf); Document doc = new Document(); doc.Add(new StringField("id", "myid", Field.Store.NO)); writer.AddDocument(doc); writer.Dispose(); } IndexWriterConfig conf_ = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer_ = new IndexWriter(dirs[0], conf_); // Now delete the document writer_.DeleteDocuments(new Term("id", "myid")); IndexReader r = DirectoryReader.Open(dirs[1]); try { writer_.AddIndexes(r); } finally { r.Dispose(); } writer_.Commit(); Assert.AreEqual(1, writer_.NumDocs(), "Documents from the incoming index should not have been deleted"); writer_.Dispose(); foreach (Directory dir in dirs) { dir.Dispose(); } }
public virtual void TestFlushExceptions() { MockDirectoryWrapper directory = NewMockDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(this); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2)); Document doc = new Document(); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int extraCount = 0; for (int i = 0; i < 10; i++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + i); } for (int j = 0; j < 20; j++) { idField.StringValue = Convert.ToString(i * 20 + j); writer.AddDocument(doc); } // must cycle here because sometimes the merge flushes // the doc we just added and so there's nothing to // flush, and we don't hit the exception while (true) { writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(true, true); if (failure.HitExc) { Assert.Fail("failed to hit IOException"); } extraCount++; } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine(ioe.StackTrace); } failure.ClearDoFail(); break; } } Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs()); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(directory); Assert.AreEqual(200 + extraCount, reader.NumDocs); reader.Dispose(); directory.Dispose(); }
public virtual void TestDeleteNullQuery() { Directory dir = NewDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); for (int i = 0; i < 5; i++) { AddDoc(modifier, i, 2 * i); } modifier.DeleteDocuments(new TermQuery(new Term("nada", "nada"))); modifier.Commit(); Assert.AreEqual(5, modifier.NumDocs()); modifier.Dispose(); dir.Dispose(); }
public virtual void TestRollingUpdates_Mem() { Random random = new Random(Random().Next()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); //provider.register(new MemoryCodec()); // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean()) //{ // Codec.Default = // TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat())); //} MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble()))); if (VERBOSE) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).StringValue = myID; Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (VERBOSE) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && Random().Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs()); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.SizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }
public virtual void TestOpenPriorSnapshot() { // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new MockRAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) writer.Commit(); } writer.Close(); System.Collections.ICollection commits = IndexReader.ListCommits(dir); Assert.AreEqual(6, commits.Count); IndexCommit lastCommit = null; System.Collections.IEnumerator it = commits.GetEnumerator(); while (it.MoveNext()) { IndexCommit commit = (IndexCommit) it.Current; if (lastCommit == null || commit.GetGeneration() > lastCommit.GetGeneration()) lastCommit = commit; } Assert.IsTrue(lastCommit != null); // Now add 1 doc and optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs()); writer.Optimize(); writer.Close(); Assert.AreEqual(7, IndexReader.ListCommits(dir).Count); // Now open writer on the commit just before optimize: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Should undo our rollback: writer.Rollback(); IndexReader r = IndexReader.Open(dir); // Still optimized, still 11 docs Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(11, r.NumDocs()); r.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Commits the rollback: writer.Close(); // Now 8 because we made another commit Assert.AreEqual(8, IndexReader.ListCommits(dir).Count); r = IndexReader.Open(dir); // Not optimized because we rolled it back, and now only // 10 docs Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Reoptimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.Optimize(); writer.Close(); r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Now open writer on the commit just before optimize, // but this time keeping only the last commit: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Reader still sees optimized index, because writer // opened on the prior commit has not yet committed: r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); writer.Close(); // Now reader sees unoptimized index: r = IndexReader.Open(dir); Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); dir.Close(); }
public virtual void TestFutureCommit() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); Document doc = new Document(); w.AddDocument(doc); // commit to "first" IDictionary <string, string> commitData = new Dictionary <string, string>(); commitData["tag"] = "first"; w.CommitData = commitData; w.Commit(); // commit to "second" w.AddDocument(doc); commitData["tag"] = "second"; w.CommitData = commitData; w.Dispose(); // open "first" with IndexWriter IndexCommit commit = null; foreach (IndexCommit c in DirectoryReader.ListCommits(dir)) { if (c.UserData["tag"].Equals("first")) { commit = c; break; } } Assert.IsNotNull(commit); w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).SetIndexCommit(commit)); Assert.AreEqual(1, w.NumDocs()); // commit IndexWriter to "third" w.AddDocument(doc); commitData["tag"] = "third"; w.CommitData = commitData; w.Dispose(); // make sure "second" commit is still there commit = null; foreach (IndexCommit c in DirectoryReader.ListCommits(dir)) { if (c.UserData["tag"].Equals("second")) { commit = c; break; } } Assert.IsNotNull(commit); dir.Dispose(); }
public virtual void TestForceMergeDeletes2() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy(50))); Document document = new Document(); FieldType customType = new FieldType(); customType.Stored = true; FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED); customType1.Tokenized = false; customType1.StoreTermVectors = true; customType1.StoreTermVectorPositions = true; customType1.StoreTermVectorOffsets = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); Field termVectorField = NewField("termVector", "termVector", customType1); document.Add(termVectorField); Field idField = NewStringField("id", "", Field.Store.NO); document.Add(idField); for (int i = 0; i < 98; i++) { idField.StringValue = "" + i; writer.AddDocument(document); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); Assert.AreEqual(98, ir.MaxDoc); Assert.AreEqual(98, ir.NumDocs); ir.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, dontMergeConfig); for (int i = 0; i < 98; i += 2) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.NumDocs); ir.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(3))); Assert.AreEqual(49, writer.NumDocs()); writer.ForceMergeDeletes(); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.MaxDoc); Assert.AreEqual(49, ir.NumDocs); ir.Dispose(); dir.Dispose(); }
public virtual void TestFlushDocCount() { int[] numThreads = new int[] { 2 + AtLeast(1), 1 }; for (int i = 0; i < numThreads.Length; i++) { int numDocumentsToIndex = 50 + AtLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetMaxBufferedDocs(2 + AtLeast(10)); iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsTrue(flushPolicy.FlushOnDocCount()); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms()); Assert.IsFalse(flushPolicy.FlushOnRAM()); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads[i]); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); Assert.IsTrue(flushPolicy.PeakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes()); dir.Dispose(); } }
public virtual void TestOpenPriorSnapshot() { // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new MockRAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) { writer.Commit(); } } writer.Close(); System.Collections.ICollection commits = IndexReader.ListCommits(dir); Assert.AreEqual(6, commits.Count); IndexCommit lastCommit = null; System.Collections.IEnumerator it = commits.GetEnumerator(); while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; if (lastCommit == null || commit.GetGeneration() > lastCommit.GetGeneration()) { lastCommit = commit; } } Assert.IsTrue(lastCommit != null); // Now add 1 doc and optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs()); writer.Optimize(); writer.Close(); Assert.AreEqual(7, IndexReader.ListCommits(dir).Count); // Now open writer on the commit just before optimize: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Should undo our rollback: writer.Rollback(); IndexReader r = IndexReader.Open(dir); // Still optimized, still 11 docs Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(11, r.NumDocs()); r.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Commits the rollback: writer.Close(); // Now 8 because we made another commit Assert.AreEqual(8, IndexReader.ListCommits(dir).Count); r = IndexReader.Open(dir); // Not optimized because we rolled it back, and now only // 10 docs Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Reoptimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.Optimize(); writer.Close(); r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Now open writer on the commit just before optimize, // but this time keeping only the last commit: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Reader still sees optimized index, because writer // opened on the prior commit has not yet committed: r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); writer.Close(); // Now reader sees unoptimized index: r = IndexReader.Open(dir); Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); dir.Close(); }
public virtual void TestForceMergeDeletes2() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy(50))); Document document = new Document(); FieldType customType = new FieldType(); customType.Stored = true; FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED); customType1.Tokenized = false; customType1.StoreTermVectors = true; customType1.StoreTermVectorPositions = true; customType1.StoreTermVectorOffsets = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); Field termVectorField = NewField("termVector", "termVector", customType1); document.Add(termVectorField); Field idField = NewStringField("id", "", Field.Store.NO); document.Add(idField); for (int i = 0; i < 98; i++) { idField.StringValue = "" + i; writer.AddDocument(document); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); Assert.AreEqual(98, ir.MaxDoc()); Assert.AreEqual(98, ir.NumDocs()); ir.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, dontMergeConfig); for (int i = 0; i < 98; i += 2) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.NumDocs()); ir.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(3))); Assert.AreEqual(49, writer.NumDocs()); writer.ForceMergeDeletes(); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.MaxDoc()); Assert.AreEqual(49, ir.NumDocs()); ir.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { int numThreads = 1 + Random().Next(8); int numDocumentsToIndex = 50 + AtLeast(70); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); iwc.SetFlushPolicy(flushPolicy); int numDWPT = 1 + Random().Next(8); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); if (flushPolicy.FlushOnRAM() && !flushPolicy.FlushOnDocCount() && !flushPolicy.FlushOnDeleteTerms()) { long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); if (flushPolicy.HasMarkedPending) { Assert.IsTrue("max: " + maxRAMBytes + " " + flushControl.PeakActiveBytes, maxRAMBytes <= flushControl.PeakActiveBytes); } } AssertActiveBytesAfter(flushControl); writer.Commit(); Assert.AreEqual(0, flushControl.ActiveBytes()); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(numDocumentsToIndex, r.NumDocs()); Assert.AreEqual(numDocumentsToIndex, r.MaxDoc()); if (!flushPolicy.FlushOnRAM()) { Assert.IsFalse("never stall if we don't flush on RAM", docsWriter.FlushControl.StallControl.WasStalled()); Assert.IsFalse("never block if we don't flush on RAM", docsWriter.FlushControl.StallControl.HasBlocked()); } r.Dispose(); writer.Dispose(); dir.Dispose(); }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); TaskScheduler es = null; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet <string> delIDs = new ConcurrentHashSet <string>(new HashSet <string>()); ISet <string> delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>()); IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
static void Do() { //var directory = new SimpleFSDirectory(new DirectoryInfo(@"c:\temp\lucene")); using (var connection = new SqlConnection(@"MultipleActiveResultSets=True;Data Source=(localdb)\v11.0;Initial Catalog=TestLucene;Integrated Security=True;Connect Timeout=30;Encrypt=False;TrustServerCertificate=False")) { connection.Open(); var directory = new SqlServerDirectory(connection, new Options() { SchemaName = "[search]" }); for (int outer = 0; outer < 1000; outer++) { IndexWriter indexWriter = null; while (indexWriter == null) { try { indexWriter = new IndexWriter(directory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), !IndexReader.IndexExists(directory), new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)); } catch (LockObtainFailedException) { Console.WriteLine("Lock is taken, waiting for timeout..."); Thread.Sleep(1000); } } ; Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index"); indexWriter.SetRAMBufferSizeMB(100.0); indexWriter.SetInfoStream(new StreamWriter(Console.OpenStandardOutput())); indexWriter.UseCompoundFile = false; for (int iDoc = 0; iDoc < 1000; iDoc++) { if (iDoc % 10 == 0) Console.WriteLine(iDoc); Document doc = new Document(); doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Title", "dog " + GeneratePhrase(50), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Body", "dog " + GeneratePhrase(50), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); indexWriter.AddDocument(doc); } Console.WriteLine("Total docs is {0}", indexWriter.NumDocs()); Console.Write("Flushing and disposing writer..."); indexWriter.Flush(true, true, true); indexWriter.Dispose(); } IndexSearcher searcher; using (new AutoStopWatch("Creating searcher")) { searcher = new IndexSearcher(directory); } using (new AutoStopWatch("Count")) Console.WriteLine("Number of docs: {0}", searcher.IndexReader.NumDocs()); while (true) { SearchForPhrase(searcher, "microsoft"); Thread.Sleep(1000); //Console.WriteLine("Press a key to search again"); //Console.ReadKey(); } } }
protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled) { int numDocumentsToIndex = 10 + AtLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetRAMBufferSizeMB(maxRamMB); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsFalse(flushPolicy.FlushOnDocCount()); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms()); Assert.IsTrue(flushPolicy.FlushOnRAM()); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); if (flushPolicy.HasMarkedPending) { Assert.IsTrue(maxRAMBytes < flushControl.PeakActiveBytes); } if (ensureNotStalled) { Assert.IsFalse(docsWriter.FlushControl.StallControl.WasStalled()); } writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes()); dir.Dispose(); }
public virtual void TestExpungeDeletes2() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(50); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 98; i++) writer.AddDocument(document); writer.Close(); IndexReader ir = IndexReader.Open(dir); Assert.AreEqual(98, ir.MaxDoc()); Assert.AreEqual(98, ir.NumDocs()); for (int i = 0; i < 98; i += 2) ir.DeleteDocument(i); Assert.AreEqual(49, ir.NumDocs()); ir.Close(); writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(3); Assert.AreEqual(49, writer.NumDocs()); writer.ExpungeDeletes(); writer.Close(); ir = IndexReader.Open(dir); Assert.AreEqual(49, ir.MaxDoc()); Assert.AreEqual(49, ir.NumDocs()); ir.Close(); dir.Close(); }
public void WriteIndex() { CreateLuceneIndexFolder(); IList<GeoName> canadianDestinations = GetDestinations("CA"); FSDirectory indexDirectory = FSDirectory.Open(new DirectoryInfo(_luceneIndexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); var writer = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); var stopwatch = new Stopwatch(); stopwatch.Start(); foreach (GeoName canadianDestination in canadianDestinations) { var doc = new Document(); doc.Add(new Field("Asciiname", canadianDestination.Asciiname, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("FeatureClass", canadianDestination.FeatureClass, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("FeatureCode", canadianDestination.FeatureCode, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Latitude", NumericUtils.DoubleToPrefixCoded(canadianDestination.Latitude), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Longitude", NumericUtils.DoubleToPrefixCoded(canadianDestination.Longitude), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Timezone", canadianDestination.Timezone, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("LastModified", DateTools.DateToString(DateTime.Now, DateTools.Resolution.HOUR), Field.Store.YES, Field.Index.NOT_ANALYZED)); int ctpsize = _ctps.Count; for (int j = 0; j < ctpsize; j++) { CartesianTierPlotter ctp = _ctps[j]; double boxId = ctp.GetTierBoxId(canadianDestination.Latitude, canadianDestination.Longitude); doc.Add(new Field(ctp.GetTierFieldName(), NumericUtils.DoubleToPrefixCoded(boxId), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); } writer.AddDocument(doc); } writer.Optimize(); writer.Commit(); stopwatch.Stop(); Debug.WriteLine("Duration: " + stopwatch.Elapsed.TotalSeconds + " seconds"); Debug.WriteLine("Number of indexed documents: " + writer.NumDocs()); }