NumDocs() public method

Returns total number of docs in this index, including docs not yet flushed (still in the RAM buffer), and including deletions. NOTE: buffered deletions are not counted. If you really need these to be counted you should call Commit() first.
public NumDocs ( ) : int
return int
Exemplo n.º 1
0
        private static RAMDirectory MakeEmptyIndex(int numDeletedDocs)
        {
            RAMDirectory d = new RAMDirectory();
            IndexWriter  w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null);

            for (int i = 0; i < numDeletedDocs; i++)
            {
                w.AddDocument(new Document(), null);
            }
            w.Commit(null);
            w.DeleteDocuments(null, new MatchAllDocsQuery());
            w.Commit(null);

            if (0 < numDeletedDocs)
            {
                Assert.IsTrue(w.HasDeletions(null), "writer has no deletions");
            }

            Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs");
            Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs");
            w.Close();
            IndexReader r = IndexReader.Open((Directory)d, true, null);

            Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs");
            r.Close();
            return(d);
        }
Exemplo n.º 2
0
        public virtual void TestIndexWriterDirtSimple()
        {
            Directory         dir = new RAMDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            TieredMergePolicy tmp = NewTieredMergePolicy();

            iwc.SetMergePolicy(tmp);
            iwc.SetMaxBufferedDocs(2);
            tmp.MaxMergeAtOnce              = 100;
            tmp.SegmentsPerTier             = 100;
            tmp.ForceMergeDeletesPctAllowed = 30.0;
            IndexWriter w = new IndexWriter(dir, iwc);

            int numDocs = 2;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            Assert.AreEqual(numDocs, w.MaxDoc);
            Assert.AreEqual(numDocs, w.NumDocs());
        }
Exemplo n.º 3
0
        public virtual void RunTest(Random random, Directory directory)
        {
            IndexWriter writer = new IndexWriter(directory, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2)).SetMergePolicy(NewLogMergePolicy()));

            for (int iter = 0; iter < NUM_ITER; iter++)
            {
                int iterFinal = iter;

                ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 1000;

                FieldType customType = new FieldType(StringField.TYPE_STORED);
                customType.OmitNorms = true;

                for (int i = 0; i < 200; i++)
                {
                    Document d = new Document();
                    d.Add(NewField("id", Convert.ToString(i), customType));
                    d.Add(NewField("contents", English.IntToEnglish(i), customType));
                    writer.AddDocument(d);
                }

                ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 4;

                ThreadClass[] threads = new ThreadClass[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    int         iFinal      = i;
                    IndexWriter writerFinal = writer;
                    threads[i] = new ThreadAnonymousInnerClassHelper(this, iterFinal, customType, iFinal, writerFinal);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                }

                Assert.IsTrue(!Failed);

                int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS)));

                Assert.AreEqual(expectedDocCount, writer.NumDocs(), "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config);
                Assert.AreEqual(expectedDocCount, writer.MaxDoc, "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config);

                writer.Dispose();
                writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(2));

                DirectoryReader reader = DirectoryReader.Open(directory);
                Assert.AreEqual(1, reader.Leaves.Count, "reader=" + reader);
                Assert.AreEqual(expectedDocCount, reader.NumDocs);
                reader.Dispose();
            }
            writer.Dispose();
        }
Exemplo n.º 4
0
        public virtual void TestForceMergeDeletes()
        {
            Directory         dir  = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            TieredMergePolicy tmp  = NewTieredMergePolicy();

            conf.SetMergePolicy(tmp);
            conf.SetMaxBufferedDocs(4);
            tmp.MaxMergeAtOnce              = 100;
            tmp.SegmentsPerTier             = 100;
            tmp.ForceMergeDeletesPctAllowed = 30.0;
            IndexWriter w = new IndexWriter(dir, conf);

            for (int i = 0; i < 80; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO));
                w.AddDocument(doc);
            }
            Assert.AreEqual(80, w.MaxDoc);
            Assert.AreEqual(80, w.NumDocs());

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: delete docs");
            }
            w.DeleteDocuments(new Term("content", "0"));
            w.ForceMergeDeletes();

            Assert.AreEqual(80, w.MaxDoc);
            Assert.AreEqual(60, w.NumDocs());

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: forceMergeDeletes2");
            }
            ((TieredMergePolicy)w.Config.MergePolicy).ForceMergeDeletesPctAllowed = 10.0;
            w.ForceMergeDeletes();
            Assert.AreEqual(60, w.NumDocs());
            Assert.AreEqual(60, w.MaxDoc);
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestForceMergeDeletes()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            TieredMergePolicy tmp = NewTieredMergePolicy();
            conf.SetMergePolicy(tmp);
            conf.SetMaxBufferedDocs(4);
            tmp.MaxMergeAtOnce = 100;
            tmp.SegmentsPerTier = 100;
            tmp.ForceMergeDeletesPctAllowed = 30.0;
            IndexWriter w = new IndexWriter(dir, conf);
            for (int i = 0; i < 80; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO));
                w.AddDocument(doc);
            }
            Assert.AreEqual(80, w.MaxDoc);
            Assert.AreEqual(80, w.NumDocs());

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: delete docs");
            }
            w.DeleteDocuments(new Term("content", "0"));
            w.ForceMergeDeletes();

            Assert.AreEqual(80, w.MaxDoc);
            Assert.AreEqual(60, w.NumDocs());

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: forceMergeDeletes2");
            }
            ((TieredMergePolicy)w.Config.MergePolicy).ForceMergeDeletesPctAllowed = 10.0;
            w.ForceMergeDeletes();
            Assert.AreEqual(60, w.NumDocs());
            Assert.AreEqual(60, w.MaxDoc);
            w.Dispose();
            dir.Dispose();
        }
Exemplo n.º 6
0
        public void TestDeleteByTermIsCurrent()
        {
            // get reader
            IndexReader reader = writer.GetReader();

            // assert index has a document and reader is up2date
            Assert.AreEqual(1, writer.NumDocs(), "One document should be in the index");
            Assert.IsTrue(reader.IsCurrent(), "Document added, reader should be stale ");

            // remove document
            Term idTerm = new Term("UUID", "1");

            writer.DeleteDocuments(idTerm);
            writer.Commit();

            // assert document has been deleted (index changed), reader is stale
            Assert.AreEqual(0, writer.NumDocs(), "Document should be removed");
            Assert.IsFalse(reader.IsCurrent(), "Reader should be stale");

            reader.Close();
        }
Exemplo n.º 7
0
        public virtual void TestStallControl()
        {
            int[] numThreads          = new int[] { 4 + Random().Next(8), 1 };
            int   numDocumentsToIndex = 50 + Random().Next(50);

            for (int i = 0; i < numThreads.Length; i++)
            {
                AtomicInteger        numDocs = new AtomicInteger(numDocumentsToIndex);
                MockDirectoryWrapper dir     = NewMockDirectory();
                // mock a very slow harddisk sometimes here so that flushing is very slow
                dir.Throttling = MockDirectoryWrapper.Throttling_e.SOMETIMES;
                IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
                iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy();
                iwc.SetFlushPolicy(flushPolicy);

                DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2);
                iwc.SetIndexerThreadPool(threadPool);
                // with such a small ram buffer we should be stalled quiet quickly
                iwc.SetRAMBufferSizeMB(0.25);
                IndexWriter   writer  = new IndexWriter(dir, iwc);
                IndexThread[] threads = new IndexThread[numThreads[i]];
                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false);
                    threads[x].Start();
                }

                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x].Join();
                }
                DocumentsWriter docsWriter = writer.DocsWriter;
                Assert.IsNotNull(docsWriter);
                DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;
                Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due");
                Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
                Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc());
                if (numThreads[i] == 1)
                {
                    Assert.IsFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.FlushControl.StallControl.HasBlocked());
                }
                if (docsWriter.FlushControl.PeakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d))
                {
                    Assert.IsTrue(docsWriter.FlushControl.StallControl.WasStalled());
                }
                AssertActiveBytesAfter(flushControl);
                writer.Dispose(true);
                dir.Dispose();
            }
        }
        public virtual void ChangeIndexWithAdds(Random random, Directory dir, string origOldName)
        {
            // open writer
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy()));

            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;

            if (Compare(origOldName, "24") < 0)
            {
                expected = 44;
            }
            else
            {
                expected = 45;
            }
            Assert.AreEqual(expected, writer.NumDocs(), "wrong doc count");
            writer.Dispose();

            // make sure searching sees right # hits
            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document   d    = searcher.IndexReader.Document(hits[0].Doc);

            assertEquals("wrong first document", "0", d.Get("id"));
            DoTestHits(hits, 44, searcher.IndexReader);
            reader.Dispose();

            // fully merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy()));
            writer.ForceMerge(1);
            writer.Dispose();

            reader   = DirectoryReader.Open(dir);
            searcher = NewSearcher(reader);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(44, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            DoTestHits(hits, 44, searcher.IndexReader);
            assertEquals("wrong first document", "0", d.Get("id"));
            reader.Dispose();
        }
Exemplo n.º 9
0
        static void Main(string[] args)
        {
            
            // default AzureDirectory stores cache in local temp folder
            var azureDirectory = new AzureDirectory(CloudStorageAccount.Parse(ConfigurationManager.AppSettings["blobStorage"]), "TestCatalog6");
            var findexExists = IndexReader.IndexExists(azureDirectory);

            IndexWriter indexWriter = null;
            while (indexWriter == null)
            {
                try
                {
                    indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), !IndexReader.IndexExists(azureDirectory), new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
                }
                catch (LockObtainFailedException)
                {
                    Console.WriteLine("Lock is taken, Hit 'Y' to clear the lock, or anything else to try again");
                    if (Console.ReadLine().ToLower().Trim() == "y" )
                        azureDirectory.ClearLock("write.lock");
                }
            };
            Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index");
            indexWriter.SetRAMBufferSizeMB(10.0);
            //indexWriter.SetUseCompoundFile(false);
            //indexWriter.SetMaxMergeDocs(10000);
            //indexWriter.SetMergeFactor(100);
            
            for (int iDoc = 0; iDoc < 10000; iDoc++)
            {
                if (iDoc % 10 == 0)
                    Console.WriteLine(iDoc);
                var doc = new Document();
                doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                indexWriter.AddDocument(doc);
            }
            Console.WriteLine("Total docs is {0}", indexWriter.NumDocs());
            indexWriter.Dispose();

            IndexSearcher searcher;
            using (new AutoStopWatch("Creating searcher"))
            {
                searcher = new IndexSearcher(azureDirectory); 
            }
            SearchForPhrase(searcher, "dog");
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            Console.Read();
        }
Exemplo n.º 10
0
        public virtual void TestFlushDocCount()
        {
            int[] numThreads = new int[] { 2 + AtLeast(1), 1 };
            for (int i = 0; i < numThreads.Length; i++)
            {
                int                    numDocumentsToIndex = 50 + AtLeast(30);
                AtomicInteger          numDocs             = new AtomicInteger(numDocumentsToIndex);
                Directory              dir         = NewDirectory();
                MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
                IndexWriterConfig      iwc         = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetFlushPolicy(flushPolicy);

                int numDWPT = 1 + AtLeast(2);
                DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);
                iwc.SetIndexerThreadPool(threadPool);
                iwc.SetMaxBufferedDocs(2 + AtLeast(10));
                iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                IndexWriter writer = new IndexWriter(dir, iwc);
                flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
                Assert.IsTrue(flushPolicy.FlushOnDocCount());
                Assert.IsFalse(flushPolicy.FlushOnDeleteTerms());
                Assert.IsFalse(flushPolicy.FlushOnRAM());
                DocumentsWriter docsWriter = writer.DocsWriter;
                Assert.IsNotNull(docsWriter);
                DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;
                Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

                IndexThread[] threads = new IndexThread[numThreads[i]];
                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false);
                    threads[x].Start();
                }

                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x].Join();
                }

                Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads[i]);
                Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
                Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc());
                Assert.IsTrue(flushPolicy.PeakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark");
                AssertActiveBytesAfter(flushControl);
                writer.Dispose();
                Assert.AreEqual(0, flushControl.ActiveBytes());
                dir.Dispose();
            }
        }
        protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken)
        {
            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30));
            analyzer.AddAnalyzer("Id", new IdentifierKeywordAnalyzer());

            int i = 0;

            using (IndexWriter writer = new IndexWriter(_directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                foreach (JObject item in items)
                {
                    i++;

                    string id = item["nuget:id"].ToString();
                    string version = item["nuget:version"].ToString();

                    BooleanQuery query = new BooleanQuery();
                    query.Add(new BooleanClause(new TermQuery(new Term("Id", id.ToLowerInvariant())), Occur.MUST));
                    query.Add(new BooleanClause(new TermQuery(new Term("Version", version)), Occur.MUST));

                    writer.DeleteDocuments(query);

                    Document doc = new Document();

                    doc.Add(new Field("Id", item["nuget:id"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Version", item["nuget:version"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    writer.AddDocument(doc);
                }

                string trace = Guid.NewGuid().ToString();

                writer.Commit(new Dictionary<string, string> 
                { 
                    { "commitTimeStamp", commitTimeStamp.ToString("O") },
                    { "trace", trace }
                });

                Trace.TraceInformation("COMMIT {0} documents, index contains {1} documents, commitTimeStamp {2}, trace: {3}",
                    i, writer.NumDocs(), commitTimeStamp.ToString("O"), trace);
            }

            return Task.FromResult(true);
        }
        public virtual void TestIndexWriter_LUCENE4656()
        {
            Store.Directory directory = NewDirectory();
            IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));

            TokenStream ts = new EmptyTokenStream();
            assertFalse(ts.HasAttribute<ITermToBytesRefAttribute>());

            Document doc = new Document();
            doc.Add(new StringField("id", "0", Field.Store.YES));
            doc.Add(new TextField("description", ts));

            // this should not fail because we have no TermToBytesRefAttribute
            writer.AddDocument(doc);

            assertEquals(1, writer.NumDocs());

            writer.Dispose();
            directory.Dispose();
        }
        public void TestReadAndWrite()
        {
            var connectionString = Environment.GetEnvironmentVariable("DataConnectionString") ?? "UseDevelopmentStorage=true";

            var cloudStorageAccount = CloudStorageAccount.Parse(connectionString);

            // default AzureDirectory stores cache in local temp folder
            var azureDirectory = new AzureDirectory(cloudStorageAccount, "testcatalog");

            using (var indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), !IndexReader.IndexExists(azureDirectory), new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)))
            {
                indexWriter.SetRAMBufferSizeMB(10.0);

                for (int iDoc = 0; iDoc < 10000; iDoc++)
                {
                    var doc = new Document();
                    doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString() + "-" + iDoc.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    indexWriter.AddDocument(doc);
                }

                Console.WriteLine("Total docs is {0}", indexWriter.NumDocs());
            }

            using (var searcher = new IndexSearcher(azureDirectory))
            {
                Assert.AreNotEqual(0, SearchForPhrase(searcher, "dog"));
                Assert.AreNotEqual(0, SearchForPhrase(searcher, "cat"));
                Assert.AreNotEqual(0, SearchForPhrase(searcher, "car"));
            }

            // check the container exists, and delete it
            var blobClient = cloudStorageAccount.CreateCloudBlobClient();
            var container = blobClient.GetContainerReference("testcatalog");
            Assert.IsTrue(container.Exists()); // check the container exists
            container.Delete();
        }
Exemplo n.º 14
0
        public virtual void TestRollingUpdates_Mem()
        {
            Random random = new Random(Random().Next());
            BaseDirectoryWrapper dir = NewDirectory();
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());

            //provider.register(new MemoryCodec());
            if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
            {
                Codec.Default =
                    TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(Random().nextBoolean(), random.NextFloat()));
            }

            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int SIZE = AtLeast(20);
            int id = 0;
            IndexReader r = null;
            IndexSearcher s = null;
            int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));
            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;
            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc = docs.NextDoc();
                string myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).StringValue = myID;

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && Random().Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = Random().NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs());

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();
            infos.Read(dir);
            long totalBytes = 0;
            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.SizeInBytes();
            }
            long totalBytes2 = 0;
            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }
Exemplo n.º 15
0
        public virtual void TestFutureCommit()
        {
            Directory dir = NewDirectory();

            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));
            Document doc = new Document();
            w.AddDocument(doc);

            // commit to "first"
            IDictionary<string, string> commitData = new Dictionary<string, string>();
            commitData["tag"] = "first";
            w.CommitData = commitData;
            w.Commit();

            // commit to "second"
            w.AddDocument(doc);
            commitData["tag"] = "second";
            w.CommitData = commitData;
            w.Dispose();

            // open "first" with IndexWriter
            IndexCommit commit = null;
            foreach (IndexCommit c in DirectoryReader.ListCommits(dir))
            {
                if (c.UserData["tag"].Equals("first"))
                {
                    commit = c;
                    break;
                }
            }

            Assert.IsNotNull(commit);

            w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).SetIndexCommit(commit));

            Assert.AreEqual(1, w.NumDocs());

            // commit IndexWriter to "third"
            w.AddDocument(doc);
            commitData["tag"] = "third";
            w.CommitData = commitData;
            w.Dispose();

            // make sure "second" commit is still there
            commit = null;
            foreach (IndexCommit c in DirectoryReader.ListCommits(dir))
            {
                if (c.UserData["tag"].Equals("second"))
                {
                    commit = c;
                    break;
                }
            }

            Assert.IsNotNull(commit);

            dir.Dispose();
        }
Exemplo n.º 16
0
		public virtual void  TestDocCount()
		{
			Directory dir = new RAMDirectory();
			
			IndexWriter writer = null;
			IndexReader reader = null;
			int i;
			
			IndexWriter.SetDefaultWriteLockTimeout(2000);
			Assert.AreEqual(2000, IndexWriter.GetDefaultWriteLockTimeout());
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			
			IndexWriter.SetDefaultWriteLockTimeout(1000);
			
			// add 100 documents
			for (i = 0; i < 100; i++)
			{
				AddDoc(writer);
			}
			Assert.AreEqual(100, writer.DocCount());
			writer.Close();
			
			// delete 40 documents
			reader = IndexReader.Open(dir);
			for (i = 0; i < 40; i++)
			{
				reader.DeleteDocument(i);
			}
			reader.Close();
			
			// test doc count before segments are merged/index is optimized
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			Assert.AreEqual(100, writer.DocCount());
			writer.Close();
			
			reader = IndexReader.Open(dir);
			Assert.AreEqual(100, reader.MaxDoc());
			Assert.AreEqual(60, reader.NumDocs());
			reader.Close();
			
			// optimize the index and check that the new doc count is correct
			writer = new IndexWriter(dir, true, new WhitespaceAnalyzer());
			Assert.AreEqual(100, writer.MaxDoc());
			Assert.AreEqual(60, writer.NumDocs());
			writer.Optimize();
			Assert.AreEqual(60, writer.MaxDoc());
			Assert.AreEqual(60, writer.NumDocs());
			writer.Close();
			
			// check that the index reader gives the same numbers.
			reader = IndexReader.Open(dir);
			Assert.AreEqual(60, reader.MaxDoc());
			Assert.AreEqual(60, reader.NumDocs());
			reader.Close();
			
			// make sure opening a new index for create over
			// this existing one works correctly:
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Assert.AreEqual(0, writer.MaxDoc());
			Assert.AreEqual(0, writer.NumDocs());
			writer.Close();
		}
Exemplo n.º 17
0
        public void TestFutureCommit()
        {
            Directory dir = new MockRAMDirectory();

            IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), new NoDeletionPolicy(), IndexWriter.MaxFieldLength.UNLIMITED);
            Document doc = new Document();
            w.AddDocument(doc);

            // commit to "first"
            System.Collections.Generic.Dictionary<string, string> commitData = new System.Collections.Generic.Dictionary<string, string>();
            commitData["tag"]="first";
            w.Commit(commitData);

            // commit to "second"
            w.AddDocument(doc);
            commitData["tag"]="second";
            w.Commit(commitData);
            w.Close();

            // open "first" with IndexWriter
            IndexCommit commit = null;
            System.Collections.IEnumerator it = IndexReader.ListCommits(dir).GetEnumerator();
            while (it.MoveNext())
            {
                IndexCommit c = (IndexCommit)it.Current;
                string tag = (String)c.GetUserData()["tag"];
                if ("first".Equals(tag))
                {
                    commit = c;
                    break;
                }
            }

            Assert.NotNull(commit);

            w = new IndexWriter(dir, new WhitespaceAnalyzer(), new NoDeletionPolicy(), IndexWriter.MaxFieldLength.UNLIMITED, commit);

            Assert.AreEqual(1, w.NumDocs());

            // commit IndexWriter to "third"
            w.AddDocument(doc);
            commitData["tag"]="third";
            w.Commit(commitData);
            w.Close();

            // make sure "second" commit is still there
            commit = null;
            it = IndexReader.ListCommits(dir).GetEnumerator();
            while (it.MoveNext())
            {
                IndexCommit c = (IndexCommit)it.Current;
                string tag = (String)c.GetUserData()["tag"];
                if ("second".Equals(tag))
                {
                    commit = c;
                    break;
                }
            }

            Assert.NotNull(commit);

            IndexReader r = IndexReader.Open(commit, true);
            Assert.AreEqual(2, r.NumDocs());
            r.Close();

            // open "second", w/ writeable IndexReader & commit
            r = IndexReader.Open(commit, new NoDeletionPolicy(), false);
            Assert.AreEqual(2, r.NumDocs());
            r.DeleteDocument(0);
            r.DeleteDocument(1);
            commitData["tag"]="fourth";
            r.Commit(commitData);
            r.Close();

            // make sure "third" commit is still there
            commit = null;
            it = IndexReader.ListCommits(dir).GetEnumerator();
            while (it.MoveNext())
            {
                IndexCommit c = (IndexCommit)it.Current;
                string tag = (String)c.GetUserData()["tag"];
                if ("third".Equals(tag))
                {
                    commit = c;
                    break;
                }
            }
            Assert.NotNull(commit);

            dir.Close();
        }
        public virtual void TestRandom()
        {
            int numThreads = 1 + Random().Next(8);
            int numDocumentsToIndex = 50 + AtLeast(70);
            AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
            iwc.SetFlushPolicy(flushPolicy);

            int numDWPT = 1 + Random().Next(8);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);
            iwc.SetIndexerThreadPool(threadPool);

            IndexWriter writer = new IndexWriter(dir, iwc);
            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            DocumentsWriter docsWriter = writer.DocsWriter;
            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;

            Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due");
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            if (flushPolicy.FlushOnRAM() && !flushPolicy.FlushOnDocCount() && !flushPolicy.FlushOnDeleteTerms())
            {
                long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);
                Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
                if (flushPolicy.HasMarkedPending)
                {
                    assertTrue("max: " + maxRAMBytes + " " + flushControl.PeakActiveBytes, maxRAMBytes <= flushControl.PeakActiveBytes);
                }
            }
            AssertActiveBytesAfter(flushControl);
            writer.Commit();
            Assert.AreEqual(0, flushControl.ActiveBytes());
            IndexReader r = DirectoryReader.Open(dir);
            Assert.AreEqual(numDocumentsToIndex, r.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, r.MaxDoc);
            if (!flushPolicy.FlushOnRAM())
            {
                assertFalse("never stall if we don't flush on RAM", docsWriter.FlushControl.StallControl.WasStalled());
                assertFalse("never block if we don't flush on RAM", docsWriter.FlushControl.StallControl.HasBlocked());
            }
            r.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void RunTest(Random random, Directory directory)
        {
            IndexWriter writer = new IndexWriter(directory, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2)).SetMergePolicy(NewLogMergePolicy()));

            for (int iter = 0; iter < NUM_ITER; iter++)
            {
                int iterFinal = iter;

                ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 1000;

                FieldType customType = new FieldType(StringField.TYPE_STORED);
                customType.OmitNorms = true;

                for (int i = 0; i < 200; i++)
                {
                    Document d = new Document();
                    d.Add(NewField("id", Convert.ToString(i), customType));
                    d.Add(NewField("contents", English.IntToEnglish(i), customType));
                    writer.AddDocument(d);
                }

                ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 4;

                ThreadClass[] threads = new ThreadClass[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    int iFinal = i;
                    IndexWriter writerFinal = writer;
                    threads[i] = new ThreadAnonymousInnerClassHelper(this, iterFinal, customType, iFinal, writerFinal);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                }

                Assert.IsTrue(!Failed);

                int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS)));

                Assert.AreEqual(expectedDocCount, writer.NumDocs(), "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config);
                Assert.AreEqual(expectedDocCount, writer.MaxDoc, "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config);

                writer.Dispose();
                writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(2));

                DirectoryReader reader = DirectoryReader.Open(directory);
                Assert.AreEqual(1, reader.Leaves.Count, "reader=" + reader);
                Assert.AreEqual(expectedDocCount, reader.NumDocs);
                reader.Dispose();
            }
            writer.Dispose();
        }
        public virtual void TestIndexWriterDirtSimple()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            TieredMergePolicy tmp = NewTieredMergePolicy();
            iwc.SetMergePolicy(tmp);
            iwc.SetMaxBufferedDocs(2);
            tmp.MaxMergeAtOnce = 100;
            tmp.SegmentsPerTier = 100;
            tmp.ForceMergeDeletesPctAllowed = 30.0;
            IndexWriter w = new IndexWriter(dir, iwc);

            int numDocs = 2;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            Assert.AreEqual(numDocs, w.MaxDoc);
            Assert.AreEqual(numDocs, w.NumDocs());
        }
        //[HttpPost]
        //public ActionResult GetAutoComplete(Models.Search model)
        //{
        //    var result = PerformAutoCompleteLookup(model.Phrase, this.IndexDirectory);
        //    var facetMap = result.FacetMap;
        //    var titleFacets = facetMap["title"];
        //    var facetVals = titleFacets.GetFacets();
        //    foreach (var facet in facetVals)
        //    {
        //        model.Results.Add(facet.ToString());
        //    }
        //    model.Phrase = string.Empty;
        //    model.SelectionGroups.Clear();
        //    model.FacetGroups.Clear();
        //    return Json(model, "application/json");
        //}
        private void IndexProducts(string indexPath)
        {
            DateTime startIndexing = DateTime.Now;
            Console.WriteLine("start indexing at: " + startIndexing);

            // read in the books xml
            var productsXml = new XmlDocument();

            string productDataPath = Server.MapPath("~/ProductData/Products.xml");

            productsXml.Load(productDataPath);

            // create the indexer with a standard analyzer
            //var indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), true);

            var directory = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath));
            bool recreateIndex = true;
            //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); // get analyzer

            ISynonymEngine engine = new SpecialSynonymEngine();
            Analyzer analyzer = new SynonymAnalyzer(Lucene.Net.Util.Version.LUCENE_29, engine);

            var indexWriter = new IndexWriter(
                directory,
                analyzer,
                recreateIndex,
                IndexWriter.MaxFieldLength.UNLIMITED);

            try
            {
                // loop through all the books in the books.xml
                foreach (XPathNavigator product in productsXml.CreateNavigator().Select("//product"))
                {
                    // create a Lucene document for this book
                    var doc = new Document();

                    // add the ID as stored but not indexed field, not used to query on
                    //doc.Add(new Field("id", product.GetAttribute("id", string.Empty), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));

                    // add the title and description as stored and tokenized fields, the analyzer processes the content
                    doc.Add(new Field("title", product.SelectSingleNode("title").Value, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    //doc.Add(new Field("Title2", product.SelectSingleNode("title").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
                    //doc.Add(new Field("description", product.SelectSingleNode("description").Value, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));

                    // add the title and genre as stored and un tokenized fields, the value is stored as is
                    doc.Add(new Field("Material", product.SelectSingleNode("properties//material").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
                    doc.Add(new Field("Style", product.SelectSingleNode("properties//style").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
                    doc.Add(new Field("Mounting", product.SelectSingleNode("properties//mounting").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
                    doc.Add(new Field("Brand", product.SelectSingleNode("properties//brand").Value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO));

                    // add the publication date as stored and un tokenized field, note the special date handling
                    //DateTime publicationDate = DateTime.Parse(product.SelectSingleNode("publish_date").Value, CultureInfo.InvariantCulture);
                    //doc.Add(new Field("publicationDate", DateField.DateToString(publicationDate), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));

                    // add the document to the index
                    indexWriter.AddDocument(doc);
                }

                // make lucene fast
                indexWriter.Optimize();
            }
            finally
            {
                // close the index writer
                indexWriter.Dispose();
            }

            DateTime endIndexing = DateTime.Now;
            Console.WriteLine("end indexing at: " + endIndexing);
            Console.WriteLine("Duration: " + (endIndexing - startIndexing).Seconds + " seconds");
            Console.WriteLine("Number of indexed document: " + indexWriter.NumDocs());
        }
        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random random = new Random(Random().Next());
            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);
            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            TaskScheduler es = Random().NextBoolean() ? null : TaskScheduler.Default;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet<string> delIDs = new ConcurrentHashSet<string>(new HashSet<string>());
            ISet<string> delPackIDs = new ConcurrentHashSet<string>(new HashSet<string>());
            IList<SubDocs> allSubDocs = new SynchronizedCollection<SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;
            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));
            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",",  delIDs.ToArray()));
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!
            /*if (es != null)
            {
              es.shutdown();
              es.awaitTermination(1, TimeUnit.SECONDS);
            }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }
Exemplo n.º 23
0
        public virtual void TestOpenPriorSnapshot()
        {
            Directory dir = NewDirectory();

            IndexWriter           writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(new KeepAllDeletionPolicy(this, dir)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10)));
            KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy)writer.Config.DelPolicy;

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
                if ((1 + i) % 2 == 0)
                {
                    writer.Commit();
                }
            }
            writer.Dispose();

            ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir);

            Assert.AreEqual(5, commits.Count);
            IndexCommit lastCommit = null;

            foreach (IndexCommit commit in commits)
            {
                if (lastCommit == null || commit.Generation > lastCommit.Generation)
                {
                    lastCommit = commit;
                }
            }
            Assert.IsTrue(lastCommit != null);

            // Now add 1 doc and merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy));
            AddDoc(writer);
            Assert.AreEqual(11, writer.NumDocs());
            writer.ForceMerge(1);
            writer.Dispose();

            Assert.AreEqual(6, DirectoryReader.ListCommits(dir).Count);

            // Now open writer on the commit just before merge:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs());

            // Should undo our rollback:
            writer.Rollback();

            DirectoryReader r = DirectoryReader.Open(dir);

            // Still merged, still 11 docs
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(11, r.NumDocs);
            r.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs());
            // Commits the rollback:
            writer.Dispose();

            // Now 8 because we made another commit
            Assert.AreEqual(7, DirectoryReader.ListCommits(dir).Count);

            r = DirectoryReader.Open(dir);
            // Not fully merged because we rolled it back, and now only
            // 10 docs
            Assert.IsTrue(r.Leaves.Count > 1);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            // Re-merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy));
            writer.ForceMerge(1);
            writer.Dispose();

            r = DirectoryReader.Open(dir);
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            // Now open writer on the commit just before merging,
            // but this time keeping only the last commit:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs());

            // Reader still sees fully merged index, because writer
            // opened on the prior commit has not yet committed:
            r = DirectoryReader.Open(dir);
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            writer.Dispose();

            // Now reader sees not-fully-merged index:
            r = DirectoryReader.Open(dir);
            Assert.IsTrue(r.Leaves.Count > 1);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            dir.Dispose();
        }
        protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled)
        {
            int numDocumentsToIndex = 10 + AtLeast(30);
            AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
            Directory dir = NewDirectory();
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy);
            int numDWPT = 1 + AtLeast(2);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);
            iwc.SetIndexerThreadPool(threadPool);
            iwc.SetRAMBufferSizeMB(maxRamMB);
            iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            IndexWriter writer = new IndexWriter(dir, iwc);
            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            Assert.IsFalse(flushPolicy.FlushOnDocCount());
            Assert.IsFalse(flushPolicy.FlushOnDeleteTerms());
            Assert.IsTrue(flushPolicy.FlushOnRAM());
            DocumentsWriter docsWriter = writer.DocsWriter;
            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;
            Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);
            Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads);
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
            AssertActiveBytesAfter(flushControl);
            if (flushPolicy.HasMarkedPending)
            {
                Assert.IsTrue(maxRAMBytes < flushControl.PeakActiveBytes);
            }
            if (ensureNotStalled)
            {
                Assert.IsFalse(docsWriter.FlushControl.StallControl.WasStalled());
            }
            writer.Dispose();
            Assert.AreEqual(0, flushControl.ActiveBytes());
            dir.Dispose();
        }
        public virtual void TestStallControl()
        {

            int[] numThreads = new int[] { 4 + Random().Next(8), 1 };
            int numDocumentsToIndex = 50 + Random().Next(50);
            for (int i = 0; i < numThreads.Length; i++)
            {
                AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
                MockDirectoryWrapper dir = NewMockDirectory();
                // mock a very slow harddisk sometimes here so that flushing is very slow
                dir.Throttling = MockDirectoryWrapper.Throttling_e.SOMETIMES;
                IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
                iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy();
                iwc.SetFlushPolicy(flushPolicy);

                DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2);
                iwc.SetIndexerThreadPool(threadPool);
                // with such a small ram buffer we should be stalled quiet quickly
                iwc.SetRAMBufferSizeMB(0.25);
                IndexWriter writer = new IndexWriter(dir, iwc);
                IndexThread[] threads = new IndexThread[numThreads[i]];
                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false);
                    threads[x].Start();
                }

                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x].Join();
                }
                DocumentsWriter docsWriter = writer.DocsWriter;
                Assert.IsNotNull(docsWriter);
                DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;
                Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due");
                Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
                Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
                if (numThreads[i] == 1)
                {
                    assertFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.FlushControl.StallControl.HasBlocked());
                }
                if (docsWriter.FlushControl.PeakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d))
                {
                    Assert.IsTrue(docsWriter.FlushControl.StallControl.WasStalled());
                }
                AssertActiveBytesAfter(flushControl);
                writer.Dispose(true);
                dir.Dispose();
            }
        }
Exemplo n.º 26
0
        public virtual void TestExistingDeletes()
        {
            Directory[] dirs = new Directory[2];
            for (int i = 0; i < dirs.Length; i++)
            {
                dirs[i] = NewDirectory();
                IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
                IndexWriter writer = new IndexWriter(dirs[i], conf);
                Document doc = new Document();
                doc.Add(new StringField("id", "myid", Field.Store.NO));
                writer.AddDocument(doc);
                writer.Dispose();
            }

            IndexWriterConfig conf_ = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter writer_ = new IndexWriter(dirs[0], conf_);

            // Now delete the document
            writer_.DeleteDocuments(new Term("id", "myid"));
            IndexReader r = DirectoryReader.Open(dirs[1]);
            try
            {
                writer_.AddIndexes(r);
            }
            finally
            {
                r.Dispose();
            }
            writer_.Commit();
            Assert.AreEqual(1, writer_.NumDocs(), "Documents from the incoming index should not have been deleted");
            writer_.Dispose();

            foreach (Directory dir in dirs)
            {
                dir.Dispose();
            }
        }
Exemplo n.º 27
0
        public virtual void TestFlushExceptions()
        {
            MockDirectoryWrapper directory = NewMockDirectory();
            FailOnlyOnFlush      failure   = new FailOnlyOnFlush(this);

            directory.FailOn(failure);

            IndexWriter writer  = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));
            Document    doc     = new Document();
            Field       idField = NewStringField("id", "", Field.Store.YES);

            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + i);
                }

                for (int j = 0; j < 20; j++)
                {
                    idField.StringValue = Convert.ToString(i * 20 + j);
                    writer.AddDocument(doc);
                }

                // must cycle here because sometimes the merge flushes
                // the doc we just added and so there's nothing to
                // flush, and we don't hit the exception
                while (true)
                {
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, true);
                        if (failure.HitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine(ioe.StackTrace);
                        }
                        failure.ClearDoFail();
                        break;
                    }
                }
                Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs());
            }

            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(directory);

            Assert.AreEqual(200 + extraCount, reader.NumDocs);
            reader.Dispose();
            directory.Dispose();
        }
Exemplo n.º 28
0
        public virtual void TestDeleteNullQuery()
        {
            Directory dir = NewDirectory();
            IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            for (int i = 0; i < 5; i++)
            {
                AddDoc(modifier, i, 2 * i);
            }

            modifier.DeleteDocuments(new TermQuery(new Term("nada", "nada")));
            modifier.Commit();
            Assert.AreEqual(5, modifier.NumDocs());
            modifier.Dispose();
            dir.Dispose();
        }
Exemplo n.º 29
0
        public virtual void TestRollingUpdates_Mem()
        {
            Random random             = new Random(Random().Next());
            BaseDirectoryWrapper dir  = NewDirectory();
            LineFileDocs         docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());

            //provider.register(new MemoryCodec());
            // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over
            //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
            //{
            //    Codec.Default =
            //        TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat()));
            //}

            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter   w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int           SIZE       = AtLeast(20);
            int           id         = 0;
            IndexReader   r          = null;
            IndexSearcher s          = null;
            int           numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;

            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc  = docs.NextDoc();
                string             myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).StringValue = myID;

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && Random().Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = Random().NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs());

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            long totalBytes = 0;

            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.SizeInBytes();
            }
            long totalBytes2 = 0;

            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }
Exemplo n.º 30
0
		public virtual void  TestOpenPriorSnapshot()
		{
			
			// Never deletes a commit
			KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this);
			
			Directory dir = new MockRAMDirectory();
			policy.dir = dir;
			
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMaxBufferedDocs(2);
			for (int i = 0; i < 10; i++)
			{
				AddDoc(writer);
				if ((1 + i) % 2 == 0)
					writer.Commit();
			}
			writer.Close();
			
			System.Collections.ICollection commits = IndexReader.ListCommits(dir);
			Assert.AreEqual(6, commits.Count);
			IndexCommit lastCommit = null;
			System.Collections.IEnumerator it = commits.GetEnumerator();
			while (it.MoveNext())
			{
				IndexCommit commit = (IndexCommit) it.Current;
				if (lastCommit == null || commit.GetGeneration() > lastCommit.GetGeneration())
					lastCommit = commit;
			}
			Assert.IsTrue(lastCommit != null);
			
			// Now add 1 doc and optimize
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED);
			AddDoc(writer);
			Assert.AreEqual(11, writer.NumDocs());
			writer.Optimize();
			writer.Close();
			
			Assert.AreEqual(7, IndexReader.ListCommits(dir).Count);
			
			// Now open writer on the commit just before optimize:
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit);
			Assert.AreEqual(10, writer.NumDocs());
			
			// Should undo our rollback:
			writer.Rollback();
			
			IndexReader r = IndexReader.Open(dir);
			// Still optimized, still 11 docs
			Assert.IsTrue(r.IsOptimized());
			Assert.AreEqual(11, r.NumDocs());
			r.Close();
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit);
			Assert.AreEqual(10, writer.NumDocs());
			// Commits the rollback:
			writer.Close();
			
			// Now 8 because we made another commit
			Assert.AreEqual(8, IndexReader.ListCommits(dir).Count);
			
			r = IndexReader.Open(dir);
			// Not optimized because we rolled it back, and now only
			// 10 docs
			Assert.IsTrue(!r.IsOptimized());
			Assert.AreEqual(10, r.NumDocs());
			r.Close();
			
			// Reoptimize
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED);
			writer.Optimize();
			writer.Close();
			
			r = IndexReader.Open(dir);
			Assert.IsTrue(r.IsOptimized());
			Assert.AreEqual(10, r.NumDocs());
			r.Close();
			
			// Now open writer on the commit just before optimize,
			// but this time keeping only the last commit:
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit);
			Assert.AreEqual(10, writer.NumDocs());
			
			// Reader still sees optimized index, because writer
			// opened on the prior commit has not yet committed:
			r = IndexReader.Open(dir);
			Assert.IsTrue(r.IsOptimized());
			Assert.AreEqual(10, r.NumDocs());
			r.Close();
			
			writer.Close();
			
			// Now reader sees unoptimized index:
			r = IndexReader.Open(dir);
			Assert.IsTrue(!r.IsOptimized());
			Assert.AreEqual(10, r.NumDocs());
			r.Close();
			
			dir.Close();
		}
Exemplo n.º 31
0
        public virtual void TestFutureCommit()
        {
            Directory dir = NewDirectory();

            IndexWriter w   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));
            Document    doc = new Document();

            w.AddDocument(doc);

            // commit to "first"
            IDictionary <string, string> commitData = new Dictionary <string, string>();

            commitData["tag"] = "first";
            w.CommitData      = commitData;
            w.Commit();

            // commit to "second"
            w.AddDocument(doc);
            commitData["tag"] = "second";
            w.CommitData      = commitData;
            w.Dispose();

            // open "first" with IndexWriter
            IndexCommit commit = null;

            foreach (IndexCommit c in DirectoryReader.ListCommits(dir))
            {
                if (c.UserData["tag"].Equals("first"))
                {
                    commit = c;
                    break;
                }
            }

            Assert.IsNotNull(commit);

            w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).SetIndexCommit(commit));

            Assert.AreEqual(1, w.NumDocs());

            // commit IndexWriter to "third"
            w.AddDocument(doc);
            commitData["tag"] = "third";
            w.CommitData      = commitData;
            w.Dispose();

            // make sure "second" commit is still there
            commit = null;
            foreach (IndexCommit c in DirectoryReader.ListCommits(dir))
            {
                if (c.UserData["tag"].Equals("second"))
                {
                    commit = c;
                    break;
                }
            }

            Assert.IsNotNull(commit);

            dir.Dispose();
        }
Exemplo n.º 32
0
        public virtual void TestForceMergeDeletes2()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy(50)));

            Document document = new Document();

            FieldType customType = new FieldType();

            customType.Stored = true;

            FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);

            customType1.Tokenized                = false;
            customType1.StoreTermVectors         = true;
            customType1.StoreTermVectorPositions = true;
            customType1.StoreTermVectorOffsets   = true;

            Field storedField = NewField("stored", "stored", customType);

            document.Add(storedField);
            Field termVectorField = NewField("termVector", "termVector", customType1);

            document.Add(termVectorField);
            Field idField = NewStringField("id", "", Field.Store.NO);

            document.Add(idField);
            for (int i = 0; i < 98; i++)
            {
                idField.StringValue = "" + i;
                writer.AddDocument(document);
            }
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);

            Assert.AreEqual(98, ir.MaxDoc);
            Assert.AreEqual(98, ir.NumDocs);
            ir.Dispose();

            IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);

            writer = new IndexWriter(dir, dontMergeConfig);
            for (int i = 0; i < 98; i += 2)
            {
                writer.DeleteDocuments(new Term("id", "" + i));
            }
            writer.Dispose();

            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(49, ir.NumDocs);
            ir.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(3)));
            Assert.AreEqual(49, writer.NumDocs());
            writer.ForceMergeDeletes();
            writer.Dispose();
            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(49, ir.MaxDoc);
            Assert.AreEqual(49, ir.NumDocs);
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestFlushDocCount()
        {
            int[] numThreads = new int[] { 2 + AtLeast(1), 1 };
            for (int i = 0; i < numThreads.Length; i++)
            {

                int numDocumentsToIndex = 50 + AtLeast(30);
                AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
                Directory dir = NewDirectory();
                MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
                IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetFlushPolicy(flushPolicy);

                int numDWPT = 1 + AtLeast(2);
                DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);
                iwc.SetIndexerThreadPool(threadPool);
                iwc.SetMaxBufferedDocs(2 + AtLeast(10));
                iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                IndexWriter writer = new IndexWriter(dir, iwc);
                flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
                Assert.IsTrue(flushPolicy.FlushOnDocCount());
                Assert.IsFalse(flushPolicy.FlushOnDeleteTerms());
                Assert.IsFalse(flushPolicy.FlushOnRAM());
                DocumentsWriter docsWriter = writer.DocsWriter;
                Assert.IsNotNull(docsWriter);
                DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;
                Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

                IndexThread[] threads = new IndexThread[numThreads[i]];
                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false);
                    threads[x].Start();
                }

                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x].Join();
                }

                Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads[i]);
                Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
                Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
                Assert.IsTrue(flushPolicy.PeakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark");
                AssertActiveBytesAfter(flushControl);
                writer.Dispose();
                Assert.AreEqual(0, flushControl.ActiveBytes());
                dir.Dispose();
            }
        }
Exemplo n.º 34
0
        public virtual void  TestOpenPriorSnapshot()
        {
            // Never deletes a commit
            KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this);

            Directory dir = new MockRAMDirectory();

            policy.dir = dir;

            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMaxBufferedDocs(2);
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
                if ((1 + i) % 2 == 0)
                {
                    writer.Commit();
                }
            }
            writer.Close();

            System.Collections.ICollection commits = IndexReader.ListCommits(dir);
            Assert.AreEqual(6, commits.Count);
            IndexCommit lastCommit = null;

            System.Collections.IEnumerator it = commits.GetEnumerator();
            while (it.MoveNext())
            {
                IndexCommit commit = (IndexCommit)it.Current;
                if (lastCommit == null || commit.GetGeneration() > lastCommit.GetGeneration())
                {
                    lastCommit = commit;
                }
            }
            Assert.IsTrue(lastCommit != null);

            // Now add 1 doc and optimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED);
            AddDoc(writer);
            Assert.AreEqual(11, writer.NumDocs());
            writer.Optimize();
            writer.Close();

            Assert.AreEqual(7, IndexReader.ListCommits(dir).Count);

            // Now open writer on the commit just before optimize:
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit);
            Assert.AreEqual(10, writer.NumDocs());

            // Should undo our rollback:
            writer.Rollback();

            IndexReader r = IndexReader.Open(dir);

            // Still optimized, still 11 docs
            Assert.IsTrue(r.IsOptimized());
            Assert.AreEqual(11, r.NumDocs());
            r.Close();

            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit);
            Assert.AreEqual(10, writer.NumDocs());
            // Commits the rollback:
            writer.Close();

            // Now 8 because we made another commit
            Assert.AreEqual(8, IndexReader.ListCommits(dir).Count);

            r = IndexReader.Open(dir);
            // Not optimized because we rolled it back, and now only
            // 10 docs
            Assert.IsTrue(!r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            // Reoptimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED);
            writer.Optimize();
            writer.Close();

            r = IndexReader.Open(dir);
            Assert.IsTrue(r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            // Now open writer on the commit just before optimize,
            // but this time keeping only the last commit:
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit);
            Assert.AreEqual(10, writer.NumDocs());

            // Reader still sees optimized index, because writer
            // opened on the prior commit has not yet committed:
            r = IndexReader.Open(dir);
            Assert.IsTrue(r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            writer.Close();

            // Now reader sees unoptimized index:
            r = IndexReader.Open(dir);
            Assert.IsTrue(!r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            dir.Close();
        }
Exemplo n.º 35
0
        public virtual void TestForceMergeDeletes2()
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy(50)));

            Document document = new Document();

            FieldType customType = new FieldType();
            customType.Stored = true;

            FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);
            customType1.Tokenized = false;
            customType1.StoreTermVectors = true;
            customType1.StoreTermVectorPositions = true;
            customType1.StoreTermVectorOffsets = true;

            Field storedField = NewField("stored", "stored", customType);
            document.Add(storedField);
            Field termVectorField = NewField("termVector", "termVector", customType1);
            document.Add(termVectorField);
            Field idField = NewStringField("id", "", Field.Store.NO);
            document.Add(idField);
            for (int i = 0; i < 98; i++)
            {
                idField.StringValue = "" + i;
                writer.AddDocument(document);
            }
            writer.Dispose();

            IndexReader ir = DirectoryReader.Open(dir);
            Assert.AreEqual(98, ir.MaxDoc());
            Assert.AreEqual(98, ir.NumDocs());
            ir.Dispose();

            IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            writer = new IndexWriter(dir, dontMergeConfig);
            for (int i = 0; i < 98; i += 2)
            {
                writer.DeleteDocuments(new Term("id", "" + i));
            }
            writer.Dispose();

            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(49, ir.NumDocs());
            ir.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(3)));
            Assert.AreEqual(49, writer.NumDocs());
            writer.ForceMergeDeletes();
            writer.Dispose();
            ir = DirectoryReader.Open(dir);
            Assert.AreEqual(49, ir.MaxDoc());
            Assert.AreEqual(49, ir.NumDocs());
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestFlushExceptions()
        {
            MockDirectoryWrapper directory = NewMockDirectory();
            FailOnlyOnFlush failure = new FailOnlyOnFlush(this);
            directory.FailOn(failure);

            IndexWriter writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));
            Document doc = new Document();
            Field idField = NewStringField("id", "", Field.Store.YES);
            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + i);
                }

                for (int j = 0; j < 20; j++)
                {
                    idField.StringValue = Convert.ToString(i * 20 + j);
                    writer.AddDocument(doc);
                }

                // must cycle here because sometimes the merge flushes
                // the doc we just added and so there's nothing to
                // flush, and we don't hit the exception
                while (true)
                {
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, true);
                        if (failure.HitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine(ioe.StackTrace);
                        }
                        failure.ClearDoFail();
                        break;
                    }
                }
                Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs());
            }

            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(directory);
            Assert.AreEqual(200 + extraCount, reader.NumDocs);
            reader.Dispose();
            directory.Dispose();
        }
Exemplo n.º 37
0
        public virtual void TestRandom()
        {
            int                    numThreads          = 1 + Random().Next(8);
            int                    numDocumentsToIndex = 50 + AtLeast(70);
            AtomicInteger          numDocs             = new AtomicInteger(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            IndexWriterConfig      iwc         = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();

            iwc.SetFlushPolicy(flushPolicy);

            int numDWPT = 1 + Random().Next(8);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);

            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;

            Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due");
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc());
            if (flushPolicy.FlushOnRAM() && !flushPolicy.FlushOnDocCount() && !flushPolicy.FlushOnDeleteTerms())
            {
                long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);
                Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
                if (flushPolicy.HasMarkedPending)
                {
                    Assert.IsTrue("max: " + maxRAMBytes + " " + flushControl.PeakActiveBytes, maxRAMBytes <= flushControl.PeakActiveBytes);
                }
            }
            AssertActiveBytesAfter(flushControl);
            writer.Commit();
            Assert.AreEqual(0, flushControl.ActiveBytes());
            IndexReader r = DirectoryReader.Open(dir);

            Assert.AreEqual(numDocumentsToIndex, r.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, r.MaxDoc());
            if (!flushPolicy.FlushOnRAM())
            {
                Assert.IsFalse("never stall if we don't flush on RAM", docsWriter.FlushControl.StallControl.WasStalled());
                Assert.IsFalse("never block if we don't flush on RAM", docsWriter.FlushControl.StallControl.HasBlocked());
            }
            r.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Exemplo n.º 38
0
        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random        random  = new Random(Random().Next());
            LineFileDocs  docs    = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);

            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
            TaskScheduler es = null;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet <string>   delIDs     = new ConcurrentHashSet <string>(new HashSet <string>());
            ISet <string>   delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>());
            IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;

            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs);
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }
Exemplo n.º 39
0
        static void Do()
        {
            //var directory = new SimpleFSDirectory(new DirectoryInfo(@"c:\temp\lucene"));
            using (var connection = new SqlConnection(@"MultipleActiveResultSets=True;Data Source=(localdb)\v11.0;Initial Catalog=TestLucene;Integrated Security=True;Connect Timeout=30;Encrypt=False;TrustServerCertificate=False"))
            {
                connection.Open();
                var directory = new SqlServerDirectory(connection, new Options() { SchemaName = "[search]" });

                for (int outer = 0; outer < 1000; outer++)
                {

                    IndexWriter indexWriter = null;
                    while (indexWriter == null)
                    {
                        try
                        {
                            indexWriter = new IndexWriter(directory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30),
                                !IndexReader.IndexExists(directory),
                                new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
                        }
                        catch (LockObtainFailedException)
                        {
                            Console.WriteLine("Lock is taken, waiting for timeout...");
                            Thread.Sleep(1000);
                        }
                    }
                ;
                    Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index");
                    indexWriter.SetRAMBufferSizeMB(100.0);
                    indexWriter.SetInfoStream(new StreamWriter(Console.OpenStandardOutput()));
                    indexWriter.UseCompoundFile = false;

                    for (int iDoc = 0; iDoc < 1000; iDoc++)
                    {
                        if (iDoc % 10 == 0)
                            Console.WriteLine(iDoc);
                        Document doc = new Document();
                        doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES,
                            Field.Index.ANALYZED, Field.TermVector.NO));
                        doc.Add(new Field("Title", "dog " + GeneratePhrase(50), Field.Store.NO, Field.Index.ANALYZED,
                            Field.TermVector.NO));
                        doc.Add(new Field("Body", "dog " + GeneratePhrase(50), Field.Store.NO, Field.Index.ANALYZED,
                            Field.TermVector.NO));
                        indexWriter.AddDocument(doc);
                    }

                    Console.WriteLine("Total docs is {0}", indexWriter.NumDocs());

                    Console.Write("Flushing and disposing writer...");
                    indexWriter.Flush(true, true, true);
                    indexWriter.Dispose();
                }

                IndexSearcher searcher;

                using (new AutoStopWatch("Creating searcher"))
                {
                    searcher = new IndexSearcher(directory);
                }
                using (new AutoStopWatch("Count"))
                    Console.WriteLine("Number of docs: {0}", searcher.IndexReader.NumDocs());

                while (true)
                {
                    SearchForPhrase(searcher, "microsoft");
                    Thread.Sleep(1000);
                    //Console.WriteLine("Press a key to search again");
                    //Console.ReadKey();
                }
            }
        }
Exemplo n.º 40
0
        protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled)
        {
            int                    numDocumentsToIndex = 10 + AtLeast(30);
            AtomicInteger          numDocs             = new AtomicInteger(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
            MockAnalyzer           analyzer    = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy);
            int numDWPT           = 1 + AtLeast(2);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);
            iwc.SetRAMBufferSizeMB(maxRamMB);
            iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            Assert.IsFalse(flushPolicy.FlushOnDocCount());
            Assert.IsFalse(flushPolicy.FlushOnDeleteTerms());
            Assert.IsTrue(flushPolicy.FlushOnRAM());
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;

            Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);

            Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads);
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc());
            Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
            AssertActiveBytesAfter(flushControl);
            if (flushPolicy.HasMarkedPending)
            {
                Assert.IsTrue(maxRAMBytes < flushControl.PeakActiveBytes);
            }
            if (ensureNotStalled)
            {
                Assert.IsFalse(docsWriter.FlushControl.StallControl.WasStalled());
            }
            writer.Dispose();
            Assert.AreEqual(0, flushControl.ActiveBytes());
            dir.Dispose();
        }
Exemplo n.º 41
0
		public virtual void  TestExpungeDeletes2()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMaxBufferedDocs(2);
			writer.SetMergeFactor(50);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			
			Document document = new Document();
			
			document = new Document();
			Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
			document.Add(storedField);
			Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
			document.Add(termVectorField);
			for (int i = 0; i < 98; i++)
				writer.AddDocument(document);
			writer.Close();
			
			IndexReader ir = IndexReader.Open(dir);
			Assert.AreEqual(98, ir.MaxDoc());
			Assert.AreEqual(98, ir.NumDocs());
			for (int i = 0; i < 98; i += 2)
				ir.DeleteDocument(i);
			Assert.AreEqual(49, ir.NumDocs());
			ir.Close();
			
			writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMergeFactor(3);
			Assert.AreEqual(49, writer.NumDocs());
			writer.ExpungeDeletes();
			writer.Close();
			ir = IndexReader.Open(dir);
			Assert.AreEqual(49, ir.MaxDoc());
			Assert.AreEqual(49, ir.NumDocs());
			ir.Close();
			dir.Close();
		}
        public virtual void ChangeIndexWithAdds(Random random, Directory dir, string origOldName)
        {
            // open writer
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy()));
            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;
            if (Compare(origOldName, "24") < 0)
            {
                expected = 44;
            }
            else
            {
                expected = 45;
            }
            Assert.AreEqual(expected, writer.NumDocs(), "wrong doc count");
            writer.Dispose();

            // make sure searching sees right # hits
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document d = searcher.IndexReader.Document(hits[0].Doc);
            assertEquals("wrong first document", "0", d.Get("id"));
            DoTestHits(hits, 44, searcher.IndexReader);
            reader.Dispose();

            // fully merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy()));
            writer.ForceMerge(1);
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            searcher = NewSearcher(reader);
            hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(44, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            DoTestHits(hits, 44, searcher.IndexReader);
            assertEquals("wrong first document", "0", d.Get("id"));
            reader.Dispose();
        }
Exemplo n.º 43
0
        public void WriteIndex()
        {
            CreateLuceneIndexFolder();
            IList<GeoName> canadianDestinations = GetDestinations("CA");

            FSDirectory indexDirectory = FSDirectory.Open(new DirectoryInfo(_luceneIndexPath));

            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
            var writer = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            var stopwatch = new Stopwatch();
            stopwatch.Start();

            foreach (GeoName canadianDestination in canadianDestinations)
            {
                var doc = new Document();

                doc.Add(new Field("Asciiname",
                                  canadianDestination.Asciiname,
                                  Field.Store.YES,
                                  Field.Index.ANALYZED));

                doc.Add(new Field("FeatureClass",
                                  canadianDestination.FeatureClass,
                                  Field.Store.YES,
                                  Field.Index.ANALYZED));

                doc.Add(new Field("FeatureCode",
                                  canadianDestination.FeatureCode,
                                  Field.Store.YES,
                                  Field.Index.ANALYZED));

                doc.Add(new Field("Latitude",
                                  NumericUtils.DoubleToPrefixCoded(canadianDestination.Latitude),
                                  Field.Store.YES,
                                  Field.Index.NOT_ANALYZED));

                doc.Add(new Field("Longitude",
                                  NumericUtils.DoubleToPrefixCoded(canadianDestination.Longitude),
                                  Field.Store.YES,
                                  Field.Index.NOT_ANALYZED));

                doc.Add(new Field("Timezone",
                                  canadianDestination.Timezone,
                                  Field.Store.YES,
                                  Field.Index.NOT_ANALYZED));

                doc.Add(new Field("LastModified",
                                  DateTools.DateToString(DateTime.Now, DateTools.Resolution.HOUR),
                                  Field.Store.YES,
                                  Field.Index.NOT_ANALYZED));

                int ctpsize = _ctps.Count;
                for (int j = 0; j < ctpsize; j++)
                {
                    CartesianTierPlotter ctp = _ctps[j];
                    double boxId = ctp.GetTierBoxId(canadianDestination.Latitude, canadianDestination.Longitude);

                    doc.Add(new Field(ctp.GetTierFieldName(),
                                      NumericUtils.DoubleToPrefixCoded(boxId),
                                      Field.Store.YES,
                                      Field.Index.NOT_ANALYZED_NO_NORMS));
                }

                writer.AddDocument(doc);
            }

            writer.Optimize();
            writer.Commit();

            stopwatch.Stop();
            Debug.WriteLine("Duration: " + stopwatch.Elapsed.TotalSeconds + " seconds");
            Debug.WriteLine("Number of indexed documents: " + writer.NumDocs());
        }