public virtual void TestLazyFieldsAfterClose() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector, null); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy, "field is not lazy and it should be"); reader.Dispose(); Assert.Throws <AlreadyClosedException>(() => { var value = field.StringValue(null); }, "did not hit AlreadyClosedException as expected"); }
private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) deletedDocs = new BitVector(Directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
public virtual void TestLoadFirst() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector(); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); int count = 0; System.Collections.IList l = doc.GetFields(); for (System.Collections.IEnumerator iter = l.GetEnumerator(); iter.MoveNext();) { Field field = (Field)iter.Current; Assert.IsTrue(field != null, "field is null and it shouldn't be"); System.String sv = field.StringValue(); Assert.IsTrue(sv != null, "sv is null and it shouldn't be"); count++; } Assert.IsTrue(count == 1, count + " does not equal: " + 1); }
public override void CheckIntegrity() { EnsureOpen(); // stored fields FieldsReader.CheckIntegrity(); // term vectors TermVectorsReader termVectorsReader = TermVectorsReader; if (termVectorsReader != null) { termVectorsReader.CheckIntegrity(); } // terms/postings if (core.fields != null) { core.fields.CheckIntegrity(); } // norms if (core.normsProducer != null) { core.normsProducer.CheckIntegrity(); } // docvalues if (dvProducers != null) { foreach (DocValuesProducer producer in dvProducers) { producer.CheckIntegrity(); } } }
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) : base(name, store, index, termVector) { InitBlock(enclosingInstance); this.toRead = toRead; this.pointer = pointer; lazy = true; }
public virtual void TestLazyFields() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos, null); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); ISet <string> loadFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector, null); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy, "field is not lazy and it should be"); System.String value_Renamed = field.StringValue(null); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be"); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF1_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == true, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue(null).Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue(null) + " is not equal to " + DocHelper.FIELD_UTF2_TEXT); field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.StringValue(null) == null, "stringValue isn't null for lazy binary field"); byte[] bytes = field.GetBinaryValue(null); Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be"); Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, ""); for (int i = 0; i < bytes.Length; i++) { Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched"); } }
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary) : base(name, store, index, termVector) { InitBlock(enclosingInstance); this.toRead = toRead; this.pointer = pointer; this.isBinary = isBinary; if (isBinary) { binaryLength = toRead; } lazy = true; }
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary, bool isCompressed) : base(name, store, Field.Index.NO, Field.TermVector.NO) { InitBlock(enclosingInstance); this.toRead = toRead; this.pointer = pointer; this.internalIsBinary = isBinary; if (isBinary) { internalBinaryLength = toRead; } lazy = true; this.isCompressed = isCompressed; }
public virtual void TestLoadSize() { FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Document doc; doc = reader.Doc(0, new AnonymousClassFieldSelector(this)); Fieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); Fieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY); Fieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(f1.IsBinary()); Assert.IsTrue(!f3.IsBinary()); Assert.IsTrue(fb.IsBinary()); AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.BinaryValue()); Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue()); AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.BinaryValue()); reader.Close(); }
public virtual void Test() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc = reader.Doc(0, null); Assert.IsTrue(doc != null); Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null); Fieldable field = doc.GetField(DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored() == true); Assert.IsTrue(field.IsStoreOffsetWithTermVector() == true); Assert.IsTrue(field.IsStorePositionWithTermVector() == true); Assert.IsTrue(field.GetOmitNorms() == false); Assert.IsTrue(field.GetOmitTf() == false); field = doc.GetField(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored() == false); Assert.IsTrue(field.IsStoreOffsetWithTermVector() == false); Assert.IsTrue(field.IsStorePositionWithTermVector() == false); Assert.IsTrue(field.GetOmitNorms() == true); Assert.IsTrue(field.GetOmitTf() == false); field = doc.GetField(DocHelper.NO_TF_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored() == false); Assert.IsTrue(field.IsStoreOffsetWithTermVector() == false); Assert.IsTrue(field.IsStorePositionWithTermVector() == false); Assert.IsTrue(field.GetOmitNorms() == false); Assert.IsTrue(field.GetOmitTf() == true); reader.Close(); }
public virtual void Test() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); try { FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc = reader.Doc(0); Assert.IsTrue(doc != null); Assert.IsTrue(doc.GetField("textField1") != null); Field field = doc.GetField("textField2"); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored() == true); reader.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestLazyFieldsAfterClose() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); System.Collections.Hashtable loadFieldNames = new System.Collections.Hashtable(); SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_1_KEY); SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_UTF1_KEY); System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable(); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_BINARY_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.TEXT_FIELD_UTF2_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be"); reader.Close(); try { field.StringValue(); Assert.Fail("did not hit AlreadyClosedException as expected"); } catch (AlreadyClosedException e) { // expected } }
internal void OpenDocStores(SegmentInfo si) { lock (this) { System.Diagnostics.Debug.Assert(si.name.Equals(segment)); if (fieldsReaderOrig == null) { Directory storeDir; if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { System.Diagnostics.Debug.Assert(storeCFSReader == null); storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } } else if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } storeDir = cfsReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } System.String storesSegment; if (si.GetDocStoreOffset() != - 1) { storesSegment = si.GetDocStoreSegment(); } else { storesSegment = segment; } fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount); } if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } } } }
public virtual void TestLazyFields() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); System.Collections.Hashtable loadFieldNames = new System.Collections.Hashtable(); SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_1_KEY); SupportClass.CollectionsHelper.AddIfNotContains(loadFieldNames, DocHelper.TEXT_FIELD_UTF1_KEY); System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable(); //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LAZY_FIELD_BINARY_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.TEXT_FIELD_UTF2_KEY); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be"); System.String value_Renamed = field.StringValue(); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT); field = doc.GetFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy(), "field is not lazy and it should be"); Assert.IsTrue(field.BinaryValue() == null, "binaryValue isn't null for lazy string field"); value_Renamed = field.StringValue(); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(value_Renamed.Equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be"); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy() == false, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy() == true, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue().Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT); field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.StringValue() == null, "stringValue isn't null for lazy binary field"); byte[] bytes = field.BinaryValue(); Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be"); Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, ""); for (int i = 0; i < bytes.Length; i++) { Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched"); } }
public virtual void Test() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc = reader.Doc(0, null); Assert.IsTrue(doc != null); Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null); IFieldable field = doc.GetField(DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored == true); Assert.IsTrue(field.IsStoreOffsetWithTermVector == true); Assert.IsTrue(field.IsStorePositionWithTermVector == true); Assert.IsTrue(field.OmitNorms == false); Assert.IsTrue(field.OmitTermFreqAndPositions == false); field = doc.GetField(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored == false); Assert.IsTrue(field.IsStoreOffsetWithTermVector == false); Assert.IsTrue(field.IsStorePositionWithTermVector == false); Assert.IsTrue(field.OmitNorms == true); Assert.IsTrue(field.OmitTermFreqAndPositions == false); field = doc.GetField(DocHelper.NO_TF_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored == false); Assert.IsTrue(field.IsStoreOffsetWithTermVector == false); Assert.IsTrue(field.IsStorePositionWithTermVector == false); Assert.IsTrue(field.OmitNorms == false); Assert.IsTrue(field.OmitTermFreqAndPositions == true); reader.Dispose(); }
private void InitBlock(FieldsReader enclosingInstance) { this.enclosingInstance = enclosingInstance; }
private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc(); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc; ) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) break; if (reader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return docCount; }
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary):base(name, store, Field.Index.NO, Field.TermVector.NO) { InitBlock(enclosingInstance); this.toRead = toRead; this.pointer = pointer; this.isBinary = isBinary; if (isBinary) binaryLength = toRead; lazy = true; }
public virtual void TestLazyPerformance() { System.String tmpIODir = SupportClass.AppSettings.Get("tempDir", ""); System.String userName = System.Environment.UserName; System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName; System.IO.FileInfo file = new System.IO.FileInfo(path); _TestUtil.RmDir(file); FSDirectory tmpDir = FSDirectory.Open(file); Assert.IsTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); Assert.IsTrue(fieldInfos != null); FieldsReader reader; long lazyTime = 0; long regularTime = 0; int length = 50; System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable(); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(new System.Collections.Hashtable(), lazyFieldNames); for (int i = 0; i < length; i++) { reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc; doc = reader.Doc(0, null); //Load all of them Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == false, "field is lazy"); System.String value_Renamed; long start; long finish; start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this was always 0ms. value_Renamed = field.StringValue(); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(field != null, "field is null and it shouldn't be"); regularTime += (finish - start); reader.Close(); reader = null; doc = null; //Hmmm, are we still in cache??? System.GC.Collect(); reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); doc = reader.Doc(0, fieldSelector); field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == true, "field is not lazy"); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this took around 50 - 70ms value_Renamed = field.StringValue(); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); lazyTime += (finish - start); reader.Close(); } System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer):base(name, store, index, termVector) { InitBlock(enclosingInstance); this.toRead = toRead; this.pointer = pointer; lazy = true; }
private void assertCompressedFields29(Directory dir, bool shouldStillBeCompressed) { int count = 0; int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.Length * 2; // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields: int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.Length; IndexReader reader = IndexReader.Open(dir, true); try { // look into sub readers and check if raw merge is on/off var readers = new System.Collections.Generic.List <IndexReader>(); ReaderUtil.GatherSubReaders(readers, reader); foreach (IndexReader ir in readers) { FieldsReader fr = ((SegmentReader)ir).GetFieldsReader(); Assert.IsTrue(shouldStillBeCompressed != fr.CanReadRawDocs(), "for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index"); } // test that decompression works correctly for (int i = 0; i < reader.MaxDoc; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i); if (d.Get("content3") != null) { continue; } count++; IFieldable compressed = d.GetFieldable("compressed"); if (int.Parse(d.Get("id")) % 2 == 0) { Assert.IsFalse(compressed.IsBinary); Assert.AreEqual(TEXT_TO_COMPRESS, compressed.StringValue, "incorrectly decompressed string"); } else { Assert.IsTrue(compressed.IsBinary); Assert.IsTrue(BINARY_TO_COMPRESS.SequenceEqual(compressed.GetBinaryValue()), "incorrectly decompressed binary"); } } } //check if field was decompressed after optimize for (int i = 0; i < reader.MaxDoc; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i, new AnonymousFieldSelector()); if (d.Get("content3") != null) { continue; } count++; // read the size from the binary value using BinaryReader (this prevents us from doing the shift ops ourselves): // ugh, Java uses Big-Endian streams, so we need to do it manually. byte[] encodedSize = d.GetFieldable("compressed").GetBinaryValue().Take(4).Reverse().ToArray(); int actualSize = BitConverter.ToInt32(encodedSize, 0); int compressedSize = int.Parse(d.Get("compressedSize")); bool binary = int.Parse(d.Get("id")) % 2 > 0; int shouldSize = shouldStillBeCompressed ? compressedSize : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH); Assert.AreEqual(shouldSize, actualSize, "size incorrect"); if (!shouldStillBeCompressed) { Assert.IsFalse(compressedSize == actualSize, "uncompressed field should have another size than recorded in index"); } } } Assert.AreEqual(34 * 2, count, "correct number of tests"); } finally { reader.Dispose(); } }
public virtual void TestLazyFieldsAfterClose() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); ISet<string> loadFieldNames = Support.Compatibility.SetFactory.GetSet<string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); ISet<string> lazyFieldNames = Support.Compatibility.SetFactory.GetSet<string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy, "field is not lazy and it should be"); reader.Dispose(); Assert.Throws<AlreadyClosedException>(() => { var value = field.StringValue; }, "did not hit AlreadyClosedException as expected"); }
public virtual void TestLazyPerformance() { System.String tmpIODir = AppSettings.Get("tempDir", ""); System.String userName = System.Environment.UserName; System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName; System.IO.DirectoryInfo file = new System.IO.DirectoryInfo(path); _TestUtil.RmDir(file); FSDirectory tmpDir = FSDirectory.Open(file); Assert.IsTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = false; writer.AddDocument(testDoc); writer.Close(); Assert.IsTrue(fieldInfos != null); FieldsReader reader; long lazyTime = 0; long regularTime = 0; int length = 50; ISet<string> lazyFieldNames = Support.Compatibility.SetFactory.GetSet<string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Support.Compatibility.SetFactory.GetSet<string>(), lazyFieldNames); for (int i = 0; i < length; i++) { reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc; doc = reader.Doc(0, null); //Load all of them Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy == false, "field is lazy"); System.String value_Renamed; long start; long finish; start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this was always 0ms. value_Renamed = field.StringValue; finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(field != null, "field is null and it shouldn't be"); regularTime += (finish - start); reader.Dispose(); reader = null; doc = null; //Hmmm, are we still in cache??? System.GC.Collect(); reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); doc = reader.Doc(0, fieldSelector); field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy == true, "field is not lazy"); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this took around 50 - 70ms value_Renamed = field.StringValue; finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); lazyTime += (finish - start); reader.Dispose(); } System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); }
public virtual void Test() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, segmentName, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Lucene.Net.Documents.Document doc = reader.Doc(0, null); Assert.IsTrue(doc != null); Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null); Field field = doc.GetField(DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored() == true); Assert.IsTrue(field.IsStoreOffsetWithTermVector() == true); Assert.IsTrue(field.IsStorePositionWithTermVector() == true); Assert.IsTrue(field.GetOmitNorms() == false); field = doc.GetField(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.IsTermVectorStored() == false); Assert.IsTrue(field.IsStoreOffsetWithTermVector() == false); Assert.IsTrue(field.IsStorePositionWithTermVector() == false); Assert.IsTrue(field.GetOmitNorms() == true); reader.Close(); }
public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary, bool isCompressed) : base(name, store, index, termVector) { InitBlock(enclosingInstance); this.toRead = toRead; this.pointer = pointer; this.internalIsBinary = isBinary; if (isBinary) internalBinaryLength = toRead; lazy = true; this.isCompressed = isCompressed; }
private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount, fieldSelectorMerge); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return docCount; }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) { FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; FieldsReader matchingFieldsReader; bool hasMatchingReader; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && !fieldsReader.CanReadRawDocs()) { matchingFieldsReader = null; hasMatchingReader = false; } else { matchingFieldsReader = fieldsReader; hasMatchingReader = true; } } else { hasMatchingReader = false; matchingFieldsReader = null; } int maxDoc = reader.MaxDoc(); bool hasDeletions = reader.HasDeletions(); for (int j = 0; j < maxDoc;) { if (!hasDeletions || !reader.IsDeleted(j)) { // skip deleted docs if (hasMatchingReader) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = j; int numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (hasDeletions && matchingSegmentReader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; if (checkAbort != null) { checkAbort.Work(300 * numDocs); } } else { // NOTE: it's very important to first assign // to doc then pass it to // termVectorsWriter.addAllDocVectors; see // LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); j++; docCount++; if (checkAbort != null) { checkAbort.Work(300); } } } else { j++; } } } } finally { fieldsWriter.Close(); } long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); // {{dougsale-2.4.0} // this shouldn't be a problem for us - if it is, // then it's not a JRE bug... //if (4+docCount*8 != fdxFileLength) // // This is most likely a bug in Sun JRE 1.6.0_04/_05; // // we detect that the bug has struck, here, and // // throw an exception to prevent the corruption from // // entering the index. See LUCENE-1282 for // // details. // throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption"); } else { // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount for (int i = 0; i < readers.Count; i++) { docCount += ((IndexReader)readers[i]).NumDocs(); } } return(docCount); }
public virtual void TestLoadSize() { FieldsReader reader = new FieldsReader(dir, segmentName, fieldInfos); Lucene.Net.Documents.Document doc; doc = reader.Doc(0, new AnonymousClassFieldSelector(this)); Fieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); Fieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY); Fieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(f1.IsBinary()); Assert.IsTrue(!f3.IsBinary()); Assert.IsTrue(fb.IsBinary()); AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.BinaryValue()); Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue()); AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.BinaryValue()); reader.Close(); }
private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (reader.IsDeleted(j)) { j++; break; } }while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return(docCount); }
private int CopyFieldsNoDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc; int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return(docCount); }
public virtual void TestLazyFields() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); ISet<string> loadFieldNames = Support.Compatibility.SetFactory.GetSet<string>(); loadFieldNames.Add(DocHelper.TEXT_FIELD_1_KEY); loadFieldNames.Add(DocHelper.TEXT_FIELD_UTF1_KEY); ISet<string> lazyFieldNames = Support.Compatibility.SetFactory.GetSet<string>(); //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_KEY); lazyFieldNames.Add(DocHelper.LAZY_FIELD_BINARY_KEY); lazyFieldNames.Add(DocHelper.TEXT_FIELD_UTF2_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LAZY_FIELD_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy, "field is not lazy and it should be"); System.String value_Renamed = field.StringValue; Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(value_Renamed.Equals(DocHelper.LAZY_FIELD_TEXT) == true, value_Renamed + " is not equal to " + DocHelper.LAZY_FIELD_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be"); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == false, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue.Equals(DocHelper.FIELD_UTF1_TEXT) == true, field.StringValue + " is not equal to " + DocHelper.FIELD_UTF1_TEXT); field = doc.GetFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.IsLazy == true, "Field is lazy and it should not be"); Assert.IsTrue(field.StringValue.Equals(DocHelper.FIELD_UTF2_TEXT) == true, field.StringValue + " is not equal to " + DocHelper.FIELD_UTF2_TEXT); field = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(field != null, "field is null and it shouldn't be"); Assert.IsTrue(field.StringValue == null, "stringValue isn't null for lazy binary field"); byte[] bytes = field.GetBinaryValue(); Assert.IsTrue(bytes != null, "bytes is null and it shouldn't be"); Assert.IsTrue(DocHelper.LAZY_FIELD_BINARY_BYTES.Length == bytes.Length, ""); for (int i = 0; i < bytes.Length; i++) { Assert.IsTrue(bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i], "byte[" + i + "] is mismatched"); } }
public virtual void TestLoadFirst() { Assert.IsTrue(dir != null); Assert.IsTrue(fieldInfos != null); FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector(); Document doc = reader.Doc(0, fieldSelector); Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); int count = 0; var l = doc.GetFields(); for (System.Collections.IEnumerator iter = l.GetEnumerator(); iter.MoveNext(); ) { Field field = (Field) iter.Current; Assert.IsTrue(field != null, "field is null and it shouldn't be"); System.String sv = field.StringValue; Assert.IsTrue(sv != null, "sv is null and it shouldn't be"); count++; } Assert.IsTrue(count == 1, count + " does not equal: " + 1); }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != -1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else { storeDir = null; } fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) { if (!fieldInfos.FieldInfo(i).omitTf) { anyProx = true; } } System.String fieldsSegment; if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); } else { fieldsSegment = segment; } if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) { proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); } OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != -1) { vectorsSegment = si.GetDocStoreSegment(); } else { vectorsSegment = segment; } termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
public virtual void TestLoadSize() { FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); Document doc; doc = reader.Doc(0, new AnonymousClassFieldSelector(this)); IFieldable f1 = doc.GetFieldable(DocHelper.TEXT_FIELD_1_KEY); IFieldable f3 = doc.GetFieldable(DocHelper.TEXT_FIELD_3_KEY); IFieldable fb = doc.GetFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); Assert.IsTrue(f1.IsBinary); Assert.IsTrue(!f3.IsBinary); Assert.IsTrue(fb.IsBinary); AssertSizeEquals(2 * DocHelper.FIELD_1_TEXT.Length, f1.GetBinaryValue()); Assert.AreEqual(DocHelper.FIELD_3_TEXT, f3.StringValue); AssertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.Length, fb.GetBinaryValue()); reader.Dispose(); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } foreach (IndexReader reader in readers) { if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; foreach (IndexReader reader in readers) { SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions) { docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Dispose(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long)docCount) * 8 != fdxFileLength) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { foreach (IndexReader reader in readers) { docCount += reader.NumDocs(); } } return(docCount); }
private void InitBlock(FieldsReader enclosingInstance) { this.Enclosing_Instance = enclosingInstance; }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else storeDir = null; fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) if (!fieldInfos.FieldInfo(i).omitTf) anyProx = true; System.String fieldsSegment; if (si.GetDocStoreOffset() != - 1) fieldsSegment = si.GetDocStoreSegment(); else fieldsSegment = segment; if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != - 1) vectorsSegment = si.GetDocStoreSegment(); else vectorsSegment = segment; termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }