private void CheckInvariants(IndexWriter writer) { _TestUtil.SyncConcurrentMerges(writer); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; }while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } System.String[] files = writer.GetDirectory().ListAll(); int segmentCfsCount = 0; for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { segmentCfsCount++; } } Assert.AreEqual(segmentCount, segmentCfsCount); }
/// <throws> IOException </throws> /// <seealso cref="IndexModifier#SetMergeFactor(int)"> /// </seealso> public virtual int GetMergeFactor() { lock (directory) { AssureOpen(); CreateIndexWriter(); return(indexWriter.GetMergeFactor()); } }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; }while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
private void CheckInvariants(IndexWriter writer) { _TestUtil.SyncConcurrentMerges(writer); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = - 1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } System.String[] files = writer.GetDirectory().ListAll(); int segmentCfsCount = 0; for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { segmentCfsCount++; } } Assert.AreEqual(segmentCount, segmentCfsCount); }
/// <summary> Tests the IndexReader.getFieldNames implementation</summary> /// <throws> Exception on error </throws> public virtual void TestGetFieldNames() { RAMDirectory d = new MockRAMDirectory(); // set up writer IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true); AddDocumentWithFields(writer); writer.Close(); // set up reader IndexReader reader = IndexReader.Open(d); System.Collections.ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Assert.IsTrue(CollectionContains(fieldNames, "keyword")); Assert.IsTrue(CollectionContains(fieldNames, "text")); Assert.IsTrue(CollectionContains(fieldNames, "unindexed")); Assert.IsTrue(CollectionContains(fieldNames, "unstored")); reader.Close(); // add more documents writer = new IndexWriter(d, new StandardAnalyzer(), false); // want to get some more segments here for (int i = 0; i < 5 * writer.GetMergeFactor(); i++) { AddDocumentWithFields(writer); } // new fields are in some different segments (we hope) for (int i = 0; i < 5 * writer.GetMergeFactor(); i++) { AddDocumentWithDifferentFields(writer); } // new termvector fields for (int i = 0; i < 5 * writer.GetMergeFactor(); i++) { AddDocumentWithTermVectorFields(writer); } writer.Close(); // verify fields again reader = IndexReader.Open(d); fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Assert.AreEqual(13, fieldNames.Count); // the following fields Assert.IsTrue(CollectionContains(fieldNames, "keyword")); Assert.IsTrue(CollectionContains(fieldNames, "text")); Assert.IsTrue(CollectionContains(fieldNames, "unindexed")); Assert.IsTrue(CollectionContains(fieldNames, "unstored")); Assert.IsTrue(CollectionContains(fieldNames, "keyword2")); Assert.IsTrue(CollectionContains(fieldNames, "text2")); Assert.IsTrue(CollectionContains(fieldNames, "unindexed2")); Assert.IsTrue(CollectionContains(fieldNames, "unstored2")); Assert.IsTrue(CollectionContains(fieldNames, "tvnot")); Assert.IsTrue(CollectionContains(fieldNames, "termvector")); Assert.IsTrue(CollectionContains(fieldNames, "tvposition")); Assert.IsTrue(CollectionContains(fieldNames, "tvoffset")); Assert.IsTrue(CollectionContains(fieldNames, "tvpositionoffset")); // verify that only indexed fields were returned fieldNames = reader.GetFieldNames(IndexReader.FieldOption.INDEXED); Assert.AreEqual(11, fieldNames.Count); // 6 original + the 5 termvector fields Assert.IsTrue(CollectionContains(fieldNames, "keyword")); Assert.IsTrue(CollectionContains(fieldNames, "text")); Assert.IsTrue(CollectionContains(fieldNames, "unstored")); Assert.IsTrue(CollectionContains(fieldNames, "keyword2")); Assert.IsTrue(CollectionContains(fieldNames, "text2")); Assert.IsTrue(CollectionContains(fieldNames, "unstored2")); Assert.IsTrue(CollectionContains(fieldNames, "tvnot")); Assert.IsTrue(CollectionContains(fieldNames, "termvector")); Assert.IsTrue(CollectionContains(fieldNames, "tvposition")); Assert.IsTrue(CollectionContains(fieldNames, "tvoffset")); Assert.IsTrue(CollectionContains(fieldNames, "tvpositionoffset")); // verify that only unindexed fields were returned fieldNames = reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED); Assert.AreEqual(2, fieldNames.Count); // the following fields Assert.IsTrue(CollectionContains(fieldNames, "unindexed")); Assert.IsTrue(CollectionContains(fieldNames, "unindexed2")); // verify index term vector fields fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR); Assert.AreEqual(1, fieldNames.Count); // 1 field has term vector only Assert.IsTrue(CollectionContains(fieldNames, "termvector")); fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION); Assert.AreEqual(1, fieldNames.Count); // 4 fields are indexed with term vectors Assert.IsTrue(CollectionContains(fieldNames, "tvposition")); fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET); Assert.AreEqual(1, fieldNames.Count); // 4 fields are indexed with term vectors Assert.IsTrue(CollectionContains(fieldNames, "tvoffset")); fieldNames = reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET); Assert.AreEqual(1, fieldNames.Count); // 4 fields are indexed with term vectors Assert.IsTrue(CollectionContains(fieldNames, "tvpositionoffset")); reader.Close(); d.Close(); }
public virtual void TestTermVectors() { RAMDirectory d = new MockRAMDirectory(); // set up writer IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true); // want to get some more segments here // new termvector fields for (int i = 0; i < 5 * writer.GetMergeFactor(); i++) { Document doc = new Document(); doc.Add(new Field("tvnot", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.Add(new Field("termvector", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES)); doc.Add(new Field("tvoffset", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tvposition", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tvpositionoffset", "one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(d); FieldSortedTermVectorMapper mapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.GetTermFreqVector(0, mapper); System.Collections.IDictionary map = mapper.GetFieldToTerms(); Assert.IsTrue(map != null, "map is null and it shouldn't be"); Assert.IsTrue(map.Count == 4, "map Size: " + map.Count + " is not: " + 4); System.Collections.IDictionary set_Renamed = (System.Collections.IDictionary) map["termvector"]; for (System.Collections.IEnumerator iterator = set_Renamed.Keys.GetEnumerator(); iterator.MoveNext(); ) { TermVectorEntry entry = (TermVectorEntry) iterator.Current; Assert.IsTrue(entry != null, "entry is null and it shouldn't be"); System.Console.Out.WriteLine("Entry: " + entry); } }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = - 1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }