GetDocCount() public method

public GetDocCount ( int i ) : int
i int
return int
        public virtual void  TestMergeAfterCopy()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexReader reader = IndexReader.Open(aux);

            for (int i = 0; i < 20; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(10, reader.NumDocs());
            reader.Close();

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(4);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
            Assert.AreEqual(1020, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1020);
        }
        private void  CheckInvariants(IndexWriter writer)
        {
            _TestUtil.SyncConcurrentMerges(writer);
            int maxBufferedDocs = writer.GetMaxBufferedDocs();
            int mergeFactor     = writer.GetMergeFactor();
            int maxMergeDocs    = writer.GetMaxMergeDocs();

            int ramSegmentCount = writer.GetNumBufferedDocuments();

            Assert.IsTrue(ramSegmentCount < maxBufferedDocs);

            int lowerBound  = -1;
            int upperBound  = maxBufferedDocs;
            int numSegments = 0;

            int segmentCount = writer.GetSegmentCount();

            for (int i = segmentCount - 1; i >= 0; i--)
            {
                int docCount = writer.GetDocCount(i);
                Assert.IsTrue(docCount > lowerBound);

                if (docCount <= upperBound)
                {
                    numSegments++;
                }
                else
                {
                    if (upperBound * mergeFactor <= maxMergeDocs)
                    {
                        Assert.IsTrue(numSegments < mergeFactor);
                    }

                    do
                    {
                        lowerBound  = upperBound;
                        upperBound *= mergeFactor;
                    }while (docCount > upperBound);
                    numSegments = 1;
                }
            }
            if (upperBound * mergeFactor <= maxMergeDocs)
            {
                Assert.IsTrue(numSegments < mergeFactor);
            }

            System.String[] files           = writer.GetDirectory().ListAll();
            int             segmentCfsCount = 0;

            for (int i = 0; i < files.Length; i++)
            {
                if (files[i].EndsWith(".cfs"))
                {
                    segmentCfsCount++;
                }
            }
            Assert.AreEqual(segmentCount, segmentCfsCount);
        }
        public virtual void  TestMoreMerges()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux  = new RAMDirectory();
            Directory aux2 = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(aux2, true);

            writer.SetMaxBufferedDocs(100);
            writer.SetMergeFactor(10);
            writer.AddIndexesNoOptimize(new Directory[] { aux });
            Assert.AreEqual(30, writer.DocCount());
            Assert.AreEqual(3, writer.GetSegmentCount());
            writer.Close();

            IndexReader reader = IndexReader.Open(aux);

            for (int i = 0; i < 27; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(3, reader.NumDocs());
            reader.Close();

            reader = IndexReader.Open(aux2);
            for (int i = 0; i < 8; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(22, reader.NumDocs());
            reader.Close();

            writer = NewWriter(dir, false);
            writer.SetMaxBufferedDocs(6);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 });
            Assert.AreEqual(1025, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1025);
        }
Beispiel #4
0
        private void CheckInvariants(IndexWriter writer)
        {
            writer.WaitForMerges();
            int maxBufferedDocs = writer.Config.MaxBufferedDocs;
            int mergeFactor     = ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor;
            int maxMergeDocs    = ((LogMergePolicy)writer.Config.MergePolicy).MaxMergeDocs;

            int ramSegmentCount = writer.NumBufferedDocuments;

            Assert.IsTrue(ramSegmentCount < maxBufferedDocs);

            int lowerBound  = -1;
            int upperBound  = maxBufferedDocs;
            int numSegments = 0;

            int segmentCount = writer.SegmentCount;

            for (int i = segmentCount - 1; i >= 0; i--)
            {
                int docCount = writer.GetDocCount(i);
                Assert.IsTrue(docCount > lowerBound, "docCount=" + docCount + " lowerBound=" + lowerBound + " upperBound=" + upperBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.SegString() + " config=" + writer.Config);

                if (docCount <= upperBound)
                {
                    numSegments++;
                }
                else
                {
                    if (upperBound * mergeFactor <= maxMergeDocs)
                    {
                        Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.SegString() + " config=" + writer.Config);
                    }

                    do
                    {
                        lowerBound  = upperBound;
                        upperBound *= mergeFactor;
                    } while (docCount > upperBound);
                    numSegments = 1;
                }
            }
            if (upperBound * mergeFactor <= maxMergeDocs)
            {
                Assert.IsTrue(numSegments < mergeFactor);
            }
        }
        private void  CheckInvariants(IndexWriter writer)
        {
            writer.WaitForMerges();
            int maxBufferedDocs = writer.GetMaxBufferedDocs();
            int mergeFactor     = writer.GetMergeFactor();
            int maxMergeDocs    = writer.GetMaxMergeDocs();

            int ramSegmentCount = writer.GetNumBufferedDocuments();

            Assert.IsTrue(ramSegmentCount < maxBufferedDocs);

            int lowerBound  = -1;
            int upperBound  = maxBufferedDocs;
            int numSegments = 0;

            int segmentCount = writer.GetSegmentCount();

            for (int i = segmentCount - 1; i >= 0; i--)
            {
                int docCount = writer.GetDocCount(i);
                Assert.IsTrue(docCount > lowerBound);

                if (docCount <= upperBound)
                {
                    numSegments++;
                }
                else
                {
                    if (upperBound * mergeFactor <= maxMergeDocs)
                    {
                        Assert.IsTrue(numSegments < mergeFactor);
                    }

                    do
                    {
                        lowerBound  = upperBound;
                        upperBound *= mergeFactor;
                    }while (docCount > upperBound);
                    numSegments = 1;
                }
            }
            if (upperBound * mergeFactor <= maxMergeDocs)
            {
                Assert.IsTrue(numSegments < mergeFactor);
            }
        }
        public virtual void  TestNoMergeAfterCopy()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
            Assert.AreEqual(1060, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1060);
        }
Beispiel #7
0
    /// <summary>
    /// This method indexes the content that is sent across to it. Each piece of content (or "document")
    /// that is indexed has to have a unique identifier (so that the caller can take action based on the
    /// document id). Therefore, this method accepts key-value pairs in the form of a dictionary. The key
    /// is a ulong which uniquely identifies the string to be indexed. The string itself is the value
    /// within the dictionary for that key. Be aware that stop words (like the, this, at, etc.) are _not_
    /// indexed.
    /// </summary>
    /// <param name="txtIdPairToBeIndexed">A dictionary of key-value pairs that are sent by the caller
    /// to uniquely identify each string that is to be indexed.</param>
    /// <returns>The number of documents indexed.</returns>
    public int Index (Dictionary<long, string> txtIdPairToBeIndexed) {

		using (Directory directory = FSDirectory.Open(_indexDir))
		using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
		using (IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
		using (IndexReader reader = writer.GetReader())
		{

			//writer.DeleteAll();


			Dictionary<long, string>.KeyCollection keys = txtIdPairToBeIndexed.Keys;

			foreach (long id in keys)
			{
				char[] delimiter = { ';' };
				string[] text = txtIdPairToBeIndexed[id].Split(delimiter);
				Document document = new Document();

				Field title = new	Field("title", text[0], Field.Store.YES, Field.Index.NO);
				Field type = new Field("type", text[1], Field.Store.YES, Field.Index.NO);
				Field idField = new Field("date", (id).ToString(), Field.Store.YES, Field.Index.ANALYZED);

				document.Add(title);
				document.Add(type);
				document.Add(idField);

				writer.AddDocument(document);
			}

			int numIndexed = writer.GetDocCount(0);//TODO check number
			writer.Optimize();
			writer.Flush(true,true,true);

			return numIndexed;
		}

		}
Beispiel #8
0
        public virtual void  TestNoCopySegments()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(9);
            writer.MergeFactor = 4;
            AddDocs(writer, 2);

            writer.AddIndexesNoOptimize(new Directory[] { aux });
            Assert.AreEqual(1032, writer.MaxDoc());
            Assert.AreEqual(2, writer.GetSegmentCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1032);
        }
 private void  CheckInvariants(IndexWriter writer)
 {
     writer.WaitForMerges();
     int maxBufferedDocs = writer.GetMaxBufferedDocs();
     int mergeFactor = writer.MergeFactor;
     int maxMergeDocs = writer.MaxMergeDocs;
     
     int ramSegmentCount = writer.GetNumBufferedDocuments();
     Assert.IsTrue(ramSegmentCount < maxBufferedDocs);
     
     int lowerBound = - 1;
     int upperBound = maxBufferedDocs;
     int numSegments = 0;
     
     int segmentCount = writer.GetSegmentCount();
     for (int i = segmentCount - 1; i >= 0; i--)
     {
         int docCount = writer.GetDocCount(i);
         Assert.IsTrue(docCount > lowerBound);
         
         if (docCount <= upperBound)
         {
             numSegments++;
         }
         else
         {
             if (upperBound * mergeFactor <= maxMergeDocs)
             {
                 Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor);
             }
             
             do 
             {
                 lowerBound = upperBound;
                 upperBound *= mergeFactor;
             }
             while (docCount > upperBound);
             numSegments = 1;
         }
     }
     if (upperBound * mergeFactor <= maxMergeDocs)
     {
         Assert.IsTrue(numSegments < mergeFactor);
     }
 }
        public virtual void TestMoreMerges()
        {
            // main directory
            Directory dir = NewDirectory();
            // auxiliary directory
            Directory aux = NewDirectory();
            Directory aux2 = NewDirectory();

            SetUpDirs(dir, aux, true);

            IndexWriter writer = NewWriter(aux2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(10)));
            writer.AddIndexes(aux);
            Assert.AreEqual(30, writer.MaxDoc);
            Assert.AreEqual(3, writer.SegmentCount);
            writer.Dispose();

            IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            writer = new IndexWriter(aux, dontMergeConfig);
            for (int i = 0; i < 27; i++)
            {
                writer.DeleteDocuments(new Term("id", "" + i));
            }
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(aux);
            Assert.AreEqual(3, reader.NumDocs);
            reader.Dispose();

            dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            writer = new IndexWriter(aux2, dontMergeConfig);
            for (int i = 0; i < 8; i++)
            {
                writer.DeleteDocuments(new Term("id", "" + i));
            }
            writer.Dispose();
            reader = DirectoryReader.Open(aux2);
            Assert.AreEqual(22, reader.NumDocs);
            reader.Dispose();

            writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(6).SetMergePolicy(NewLogMergePolicy(4)));

            writer.AddIndexes(aux, aux2);
            Assert.AreEqual(1040, writer.MaxDoc);
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Dispose();
            dir.Dispose();
            aux.Dispose();
            aux2.Dispose();
        }
        public virtual void TestMergeAfterCopy()
        {
            // main directory
            Directory dir = NewDirectory();
            // auxiliary directory
            Directory aux = NewDirectory();

            SetUpDirs(dir, aux, true);

            IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            IndexWriter writer = new IndexWriter(aux, dontMergeConfig);
            for (int i = 0; i < 20; i++)
            {
                writer.DeleteDocuments(new Term("id", "" + i));
            }
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(aux);
            Assert.AreEqual(10, reader.NumDocs);
            reader.Dispose();

            writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(4).SetMergePolicy(NewLogMergePolicy(4)));

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now addIndexes");
            }
            writer.AddIndexes(aux, new MockDirectoryWrapper(Random(), new RAMDirectory(aux, NewIOContext(Random()))));
            Assert.AreEqual(1020, writer.MaxDoc);
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Dispose();
            dir.Dispose();
            aux.Dispose();
        }
		private void  CheckInvariants(IndexWriter writer)
		{
			_TestUtil.SyncConcurrentMerges(writer);
			int maxBufferedDocs = writer.GetMaxBufferedDocs();
			int mergeFactor = writer.GetMergeFactor();
			int maxMergeDocs = writer.GetMaxMergeDocs();
			
			int ramSegmentCount = writer.GetNumBufferedDocuments();
			Assert.IsTrue(ramSegmentCount < maxBufferedDocs);
			
			int lowerBound = - 1;
			int upperBound = maxBufferedDocs;
			int numSegments = 0;
			
			int segmentCount = writer.GetSegmentCount();
			for (int i = segmentCount - 1; i >= 0; i--)
			{
				int docCount = writer.GetDocCount(i);
				Assert.IsTrue(docCount > lowerBound);
				
				if (docCount <= upperBound)
				{
					numSegments++;
				}
				else
				{
					if (upperBound * mergeFactor <= maxMergeDocs)
					{
						Assert.IsTrue(numSegments < mergeFactor);
					}
					
					do 
					{
						lowerBound = upperBound;
						upperBound *= mergeFactor;
					}
					while (docCount > upperBound);
					numSegments = 1;
				}
			}
			if (upperBound * mergeFactor <= maxMergeDocs)
			{
				Assert.IsTrue(numSegments < mergeFactor);
			}
			
			System.String[] files = writer.GetDirectory().ListAll();
			int segmentCfsCount = 0;
			for (int i = 0; i < files.Length; i++)
			{
				if (files[i].EndsWith(".cfs"))
				{
					segmentCfsCount++;
				}
			}
			Assert.AreEqual(segmentCount, segmentCfsCount);
		}
		private void  PrintSegmentDocCounts(IndexWriter writer)
		{
			int segmentCount = writer.GetSegmentCount();
			System.Console.Out.WriteLine("" + segmentCount + " segments total");
			for (int i = 0; i < segmentCount; i++)
			{
				System.Console.Out.WriteLine("  segment " + i + " has " + writer.GetDocCount(i) + " docs");
			}
		}
        private void CheckInvariants(IndexWriter writer)
        {
            writer.WaitForMerges();
            int maxBufferedDocs = writer.Config.MaxBufferedDocs;
            int mergeFactor = ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor;
            int maxMergeDocs = ((LogMergePolicy)writer.Config.MergePolicy).MaxMergeDocs;

            int ramSegmentCount = writer.NumBufferedDocuments;
            Assert.IsTrue(ramSegmentCount < maxBufferedDocs);

            int lowerBound = -1;
            int upperBound = maxBufferedDocs;
            int numSegments = 0;

            int segmentCount = writer.SegmentCount;
            for (int i = segmentCount - 1; i >= 0; i--)
            {
                int docCount = writer.GetDocCount(i);
                Assert.IsTrue(docCount > lowerBound, "docCount=" + docCount + " lowerBound=" + lowerBound + " upperBound=" + upperBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.SegString() + " config=" + writer.Config);

                if (docCount <= upperBound)
                {
                    numSegments++;
                }
                else
                {
                    if (upperBound * mergeFactor <= maxMergeDocs)
                    {
                        Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.SegString() + " config=" + writer.Config);
                    }

                    do
                    {
                        lowerBound = upperBound;
                        upperBound *= mergeFactor;
                    } while (docCount > upperBound);
                    numSegments = 1;
                }
            }
            if (upperBound * mergeFactor <= maxMergeDocs)
            {
                Assert.IsTrue(numSegments < mergeFactor);
            }
        }