private void  CheckInvariants(IndexWriter writer)
        {
            _TestUtil.SyncConcurrentMerges(writer);
            int maxBufferedDocs = writer.GetMaxBufferedDocs();
            int mergeFactor     = writer.GetMergeFactor();
            int maxMergeDocs    = writer.GetMaxMergeDocs();

            int ramSegmentCount = writer.GetNumBufferedDocuments();

            Assert.IsTrue(ramSegmentCount < maxBufferedDocs);

            int lowerBound  = -1;
            int upperBound  = maxBufferedDocs;
            int numSegments = 0;

            int segmentCount = writer.GetSegmentCount();

            for (int i = segmentCount - 1; i >= 0; i--)
            {
                int docCount = writer.GetDocCount(i);
                Assert.IsTrue(docCount > lowerBound);

                if (docCount <= upperBound)
                {
                    numSegments++;
                }
                else
                {
                    if (upperBound * mergeFactor <= maxMergeDocs)
                    {
                        Assert.IsTrue(numSegments < mergeFactor);
                    }

                    do
                    {
                        lowerBound  = upperBound;
                        upperBound *= mergeFactor;
                    }while (docCount > upperBound);
                    numSegments = 1;
                }
            }
            if (upperBound * mergeFactor <= maxMergeDocs)
            {
                Assert.IsTrue(numSegments < mergeFactor);
            }

            System.String[] files           = writer.GetDirectory().ListAll();
            int             segmentCfsCount = 0;

            for (int i = 0; i < files.Length; i++)
            {
                if (files[i].EndsWith(".cfs"))
                {
                    segmentCfsCount++;
                }
            }
            Assert.AreEqual(segmentCount, segmentCfsCount);
        }
예제 #2
0
 /// <throws>  IOException </throws>
 /// <seealso cref="IndexModifier#SetMergeFactor(int)">
 /// </seealso>
 public virtual int GetMergeFactor()
 {
     lock (directory)
     {
         AssureOpen();
         CreateIndexWriter();
         return(indexWriter.GetMergeFactor());
     }
 }
        private void  CheckInvariants(IndexWriter writer)
        {
            writer.WaitForMerges();
            int maxBufferedDocs = writer.GetMaxBufferedDocs();
            int mergeFactor     = writer.GetMergeFactor();
            int maxMergeDocs    = writer.GetMaxMergeDocs();

            int ramSegmentCount = writer.GetNumBufferedDocuments();

            Assert.IsTrue(ramSegmentCount < maxBufferedDocs);

            int lowerBound  = -1;
            int upperBound  = maxBufferedDocs;
            int numSegments = 0;

            int segmentCount = writer.GetSegmentCount();

            for (int i = segmentCount - 1; i >= 0; i--)
            {
                int docCount = writer.GetDocCount(i);
                Assert.IsTrue(docCount > lowerBound);

                if (docCount <= upperBound)
                {
                    numSegments++;
                }
                else
                {
                    if (upperBound * mergeFactor <= maxMergeDocs)
                    {
                        Assert.IsTrue(numSegments < mergeFactor);
                    }

                    do
                    {
                        lowerBound  = upperBound;
                        upperBound *= mergeFactor;
                    }while (docCount > upperBound);
                    numSegments = 1;
                }
            }
            if (upperBound * mergeFactor <= maxMergeDocs)
            {
                Assert.IsTrue(numSegments < mergeFactor);
            }
        }
		private void  CheckInvariants(IndexWriter writer)
		{
			_TestUtil.SyncConcurrentMerges(writer);
			int maxBufferedDocs = writer.GetMaxBufferedDocs();
			int mergeFactor = writer.GetMergeFactor();
			int maxMergeDocs = writer.GetMaxMergeDocs();
			
			int ramSegmentCount = writer.GetNumBufferedDocuments();
			Assert.IsTrue(ramSegmentCount < maxBufferedDocs);
			
			int lowerBound = - 1;
			int upperBound = maxBufferedDocs;
			int numSegments = 0;
			
			int segmentCount = writer.GetSegmentCount();
			for (int i = segmentCount - 1; i >= 0; i--)
			{
				int docCount = writer.GetDocCount(i);
				Assert.IsTrue(docCount > lowerBound);
				
				if (docCount <= upperBound)
				{
					numSegments++;
				}
				else
				{
					if (upperBound * mergeFactor <= maxMergeDocs)
					{
						Assert.IsTrue(numSegments < mergeFactor);
					}
					
					do 
					{
						lowerBound = upperBound;
						upperBound *= mergeFactor;
					}
					while (docCount > upperBound);
					numSegments = 1;
				}
			}
			if (upperBound * mergeFactor <= maxMergeDocs)
			{
				Assert.IsTrue(numSegments < mergeFactor);
			}
			
			System.String[] files = writer.GetDirectory().ListAll();
			int segmentCfsCount = 0;
			for (int i = 0; i < files.Length; i++)
			{
				if (files[i].EndsWith(".cfs"))
				{
					segmentCfsCount++;
				}
			}
			Assert.AreEqual(segmentCount, segmentCfsCount);
		}
예제 #5
0
		/// <summary> Tests the IndexReader.getFieldNames implementation</summary>
		/// <throws>  Exception on error </throws>
		public virtual void  TestGetFieldNames()
		{
			RAMDirectory d = new MockRAMDirectory();
			// set up writer
			IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true);
			AddDocumentWithFields(writer);
			writer.Close();
			// set up reader
			IndexReader reader = IndexReader.Open(d);
			System.Collections.ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
			Assert.IsTrue(CollectionContains(fieldNames, "keyword"));
			Assert.IsTrue(CollectionContains(fieldNames, "text"));
			Assert.IsTrue(CollectionContains(fieldNames, "unindexed"));
			Assert.IsTrue(CollectionContains(fieldNames, "unstored"));
			reader.Close();
			// add more documents
			writer = new IndexWriter(d, new StandardAnalyzer(), false);
			// want to get some more segments here
			for (int i = 0; i < 5 * writer.GetMergeFactor(); i++)
			{
				AddDocumentWithFields(writer);
			}
			// new fields are in some different segments (we hope)
			for (int i = 0; i < 5 * writer.GetMergeFactor(); i++)
			{
				AddDocumentWithDifferentFields(writer);
			}
			// new termvector fields
			for (int i = 0; i < 5 * writer.GetMergeFactor(); i++)
			{
				AddDocumentWithTermVectorFields(writer);
			}
			
			writer.Close();
			// verify fields again
			reader = IndexReader.Open(d);
			fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
			Assert.AreEqual(13, fieldNames.Count); // the following fields
			Assert.IsTrue(CollectionContains(fieldNames, "keyword"));
			Assert.IsTrue(CollectionContains(fieldNames, "text"));
			Assert.IsTrue(CollectionContains(fieldNames, "unindexed"));
			Assert.IsTrue(CollectionContains(fieldNames, "unstored"));
			Assert.IsTrue(CollectionContains(fieldNames, "keyword2"));
			Assert.IsTrue(CollectionContains(fieldNames, "text2"));
			Assert.IsTrue(CollectionContains(fieldNames, "unindexed2"));
			Assert.IsTrue(CollectionContains(fieldNames, "unstored2"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvnot"));
			Assert.IsTrue(CollectionContains(fieldNames, "termvector"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvposition"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvoffset"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvpositionoffset"));
			
			// verify that only indexed fields were returned
			fieldNames = reader.GetFieldNames(IndexReader.FieldOption.INDEXED);
			Assert.AreEqual(11, fieldNames.Count); // 6 original + the 5 termvector fields 
			Assert.IsTrue(CollectionContains(fieldNames, "keyword"));
			Assert.IsTrue(CollectionContains(fieldNames, "text"));
			Assert.IsTrue(CollectionContains(fieldNames, "unstored"));
			Assert.IsTrue(CollectionContains(fieldNames, "keyword2"));
			Assert.IsTrue(CollectionContains(fieldNames, "text2"));
			Assert.IsTrue(CollectionContains(fieldNames, "unstored2"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvnot"));
			Assert.IsTrue(CollectionContains(fieldNames, "termvector"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvposition"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvoffset"));
			Assert.IsTrue(CollectionContains(fieldNames, "tvpositionoffset"));
			
			// verify that only unindexed fields were returned
			fieldNames = reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED);
			Assert.AreEqual(2, fieldNames.Count); // the following fields
			Assert.IsTrue(CollectionContains(fieldNames, "unindexed"));
			Assert.IsTrue(CollectionContains(fieldNames, "unindexed2"));
			
			// verify index term vector fields  
			fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR);
			Assert.AreEqual(1, fieldNames.Count); // 1 field has term vector only
			Assert.IsTrue(CollectionContains(fieldNames, "termvector"));
			
			fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
			Assert.AreEqual(1, fieldNames.Count); // 4 fields are indexed with term vectors
			Assert.IsTrue(CollectionContains(fieldNames, "tvposition"));
			
			fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
			Assert.AreEqual(1, fieldNames.Count); // 4 fields are indexed with term vectors
			Assert.IsTrue(CollectionContains(fieldNames, "tvoffset"));
			
			fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
			Assert.AreEqual(1, fieldNames.Count); // 4 fields are indexed with term vectors
			Assert.IsTrue(CollectionContains(fieldNames, "tvpositionoffset"));
			reader.Close();
			d.Close();
		}
예제 #6
0
		public virtual void  TestTermVectors()
		{
			RAMDirectory d = new MockRAMDirectory();
			// set up writer
			IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true);
			// want to get some more segments here
			// new termvector fields
			for (int i = 0; i < 5 * writer.GetMergeFactor(); i++)
			{
				Document doc = new Document();
				doc.Add(new Field("tvnot", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
				doc.Add(new Field("termvector", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
				doc.Add(new Field("tvoffset", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS));
				doc.Add(new Field("tvposition", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS));
				doc.Add(new Field("tvpositionoffset", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
				
				writer.AddDocument(doc);
			}
			writer.Close();
			IndexReader reader = IndexReader.Open(d);
			FieldSortedTermVectorMapper mapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
			reader.GetTermFreqVector(0, mapper);
			System.Collections.IDictionary map = mapper.GetFieldToTerms();
			Assert.IsTrue(map != null, "map is null and it shouldn't be");
			Assert.IsTrue(map.Count == 4, "map Size: " + map.Count + " is not: " + 4);
			System.Collections.IDictionary set_Renamed = (System.Collections.IDictionary) map["termvector"];
			for (System.Collections.IEnumerator iterator = set_Renamed.Keys.GetEnumerator(); iterator.MoveNext(); )
			{
				TermVectorEntry entry = (TermVectorEntry) iterator.Current;
				Assert.IsTrue(entry != null, "entry is null and it shouldn't be");
				System.Console.Out.WriteLine("Entry: " + entry);
			}
		}
		private void  CheckInvariants(IndexWriter writer)
		{
            writer.WaitForMerges();
			int maxBufferedDocs = writer.GetMaxBufferedDocs();
			int mergeFactor = writer.GetMergeFactor();
			int maxMergeDocs = writer.GetMaxMergeDocs();
			
			int ramSegmentCount = writer.GetNumBufferedDocuments();
			Assert.IsTrue(ramSegmentCount < maxBufferedDocs);
			
			int lowerBound = - 1;
			int upperBound = maxBufferedDocs;
			int numSegments = 0;
			
			int segmentCount = writer.GetSegmentCount();
			for (int i = segmentCount - 1; i >= 0; i--)
			{
				int docCount = writer.GetDocCount(i);
				Assert.IsTrue(docCount > lowerBound);
				
				if (docCount <= upperBound)
				{
					numSegments++;
				}
				else
				{
					if (upperBound * mergeFactor <= maxMergeDocs)
					{
						Assert.IsTrue(numSegments < mergeFactor);
					}
					
					do 
					{
						lowerBound = upperBound;
						upperBound *= mergeFactor;
					}
					while (docCount > upperBound);
					numSegments = 1;
				}
			}
			if (upperBound * mergeFactor <= maxMergeDocs)
			{
				Assert.IsTrue(numSegments < mergeFactor);
			}
		}