/// <summary>Return a term frequency vector for the specified document and field. The /// vector returned contains term numbers and frequencies for all terms in /// the specified field of this document, if the field had storeTermVector /// flag set. If the flag was not set, the method returns null. /// </summary> /// <throws> IOException </throws> public override TermFreqVector GetTermFreqVector(int docNumber, System.String field) { // Check if this field is invalid or has no stored term vector EnsureOpen(); FieldInfo fi = fieldInfos.FieldInfo(field); if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) { return(null); } TermVectorsReader termVectorsReader = GetTermVectorsReader(); if (termVectorsReader == null) { return(null); } return(termVectorsReader.Get(docNumber, field)); }
public virtual void TestReader() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos, null); Assert.IsTrue(reader != null); for (int j = 0; j < 5; j++) { ITermFreqVector vector = reader.Get(j, testFields[0], null); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); Assert.IsTrue(terms.Length == testTerms.Length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); Assert.IsTrue(term.Equals(testTerms[i])); } } }
public virtual void TestBadParams() { var reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //Bad document number, good field number Assert.Throws <System.IO.IOException>(() => reader.Get(50, testFields[0])); reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //Bad document number, no field Assert.Throws <System.IO.IOException>(() => reader.Get(50)); reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); Assert.DoesNotThrow(() => { //good document number, bad field number ITermFreqVector vector = reader.Get(0, "f50"); Assert.IsTrue(vector == null); }); }
public virtual void TestBadParams() { try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //Bad document number, good field number reader.Get(50, testFields[0]); Assert.Fail(); } catch (System.IO.IOException e) { // expected exception } try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //Bad document number, no field reader.Get(50); Assert.Fail(); } catch (System.IO.IOException e) { // expected exception } try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //good document number, bad field number TermFreqVector vector = reader.Get(0, "f50"); Assert.IsTrue(vector == null); } catch (System.IO.IOException e) { Assert.Fail(); } }
public virtual void TestReader() { TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random)); for (int j = 0; j < 5; j++) { Terms vector = reader.Get(j).GetTerms(testFields[0]); Assert.IsNotNull(vector); Assert.AreEqual(testTerms.Length, vector.Count); TermsEnum termsEnum = vector.GetEnumerator(); for (int i = 0; i < testTerms.Length; i++) { Assert.IsTrue(termsEnum.MoveNext()); BytesRef text = termsEnum.Term; string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(testTerms[i], term); } Assert.IsFalse(termsEnum.MoveNext()); } reader.Dispose(); }
public virtual void TestReader() { TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random())); for (int j = 0; j < 5; j++) { Terms vector = reader.Get(j).Terms(TestFields[0]); Assert.IsNotNull(vector); Assert.AreEqual(TestTerms.Length, vector.Size()); TermsEnum termsEnum = vector.Iterator(null); for (int i = 0; i < TestTerms.Length; i++) { BytesRef text = termsEnum.Next(); Assert.IsNotNull(text); string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(TestTerms[i], term); } Assert.IsNull(termsEnum.Next()); } reader.Dispose(); }
public virtual System.Object Clone() { if (tvx == null || tvd == null || tvf == null) { return(null); } TermVectorsReader clone = null; try { clone = (TermVectorsReader)base.MemberwiseClone(); } catch (System.Exception) { } clone.tvx = (IndexInput)tvx.Clone(); clone.tvd = (IndexInput)tvd.Clone(); clone.tvf = (IndexInput)tvf.Clone(); return(clone); }
private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), segment + ".del"); } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
public override void CheckIntegrity() { EnsureOpen(); // stored fields FieldsReader.CheckIntegrity(); // term vectors TermVectorsReader termVectorsReader = TermVectorsReader; if (termVectorsReader != null) { termVectorsReader.CheckIntegrity(); } // terms/postings if (core.fields != null) { core.fields.CheckIntegrity(); } // norms if (core.normsProducer != null) { core.normsProducer.CheckIntegrity(); } // docvalues if (dvProducers != null) { foreach (DocValuesProducer producer in dvProducers) { producer.CheckIntegrity(); } } }
public virtual void TestReader() { try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); TermFreqVector vector = reader.Get(0, testFields[0]); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); Assert.IsTrue(terms.Length == testTerms.Length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); Assert.IsTrue(term.Equals(testTerms[i])); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestPositionReader() { TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(dir, seg.Info, fieldInfos, NewIOContext(Random)); //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene Terms vector = reader.Get(0).GetTerms(testFields[0]); Assert.IsNotNull(vector); Assert.AreEqual(testTerms.Length, vector.Count); TermsEnum termsEnum = vector.GetEnumerator(); DocsAndPositionsEnum dpEnum = null; for (int i = 0; i < testTerms.Length; i++) { Assert.IsTrue(termsEnum.MoveNext()); BytesRef text = termsEnum.Term; string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(testTerms[i], term); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsNotNull(dpEnum); int doc = dpEnum.DocID; Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(dpEnum.Freq, positions[i].Length); for (int j = 0; j < positions[i].Length; j++) { Assert.AreEqual(positions[i][j], dpEnum.NextPosition()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); doc = dpEnum.DocID; Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.IsNotNull(dpEnum); Assert.AreEqual(dpEnum.Freq, positions[i].Length); for (int j = 0; j < positions[i].Length; j++) { Assert.AreEqual(positions[i][j], dpEnum.NextPosition()); Assert.AreEqual(j * 10, dpEnum.StartOffset); Assert.AreEqual(j * 10 + testTerms[i].Length, dpEnum.EndOffset); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); } Terms freqVector = reader.Get(0).GetTerms(testFields[1]); //no pos, no offset Assert.IsNotNull(freqVector); Assert.AreEqual(testTerms.Length, freqVector.Count); termsEnum = freqVector.GetEnumerator(); Assert.IsNotNull(termsEnum); for (int i = 0; i < testTerms.Length; i++) { Assert.IsTrue(termsEnum.MoveNext()); BytesRef text = termsEnum.Term; string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(testTerms[i], term); Assert.IsNotNull(termsEnum.Docs(null, null)); Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos } reader.Dispose(); }
public virtual void TestWriter() { try { TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos); writer.OpenDocument(); Assert.IsTrue(writer.IsDocumentOpen() == true); WriteField(writer, testFields[0]); writer.CloseDocument(); writer.Close(); Assert.IsTrue(writer.IsDocumentOpen() == false); //Check to see the files were created Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVD_EXTENSION)); Assert.IsTrue(dir.FileExists(seg + TermVectorsWriter.TVX_EXTENSION)); //Now read it back in TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); CheckTermVector(reader, 0, testFields[0]); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestReader() { try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); TermFreqVector vector = reader.Get(0, testFields[0]); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); Assert.IsTrue(terms.Length == testTerms.Length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); Assert.IsTrue(term.Equals(testTerms[i])); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestMapper() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, mapper); SortedSet<TermVectorEntry> set_Renamed = mapper.GetTermVectorEntrySet(); Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Check offsets and positions foreach(TermVectorEntry tve in set_Renamed) { Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(1, mapper); set_Renamed = mapper.GetTermVectorEntrySet(); Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Should have offsets and positions b/c we are munging all the fields together foreach(TermVectorEntry tve in set_Renamed) { Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); IDictionary<string, SortedSet<TermVectorEntry>> map = fsMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); foreach(KeyValuePair<string,SortedSet<TermVectorEntry>> entry in new Dictionary<string, SortedSet<TermVectorEntry>>(map)) { SortedSet<TermVectorEntry> sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); foreach(TermVectorEntry tve in sortedSet) { Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.GetField(); if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } //Try mapper that ignores offs and positions fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); map = fsMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); foreach(KeyValuePair<string,SortedSet<TermVectorEntry>> entry in new Dictionary<string,SortedSet<TermVectorEntry>>(map)) { SortedSet<TermVectorEntry> sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); foreach(TermVectorEntry tve in sortedSet) { Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.GetField(); if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } // test setDocumentNumber() IndexReader ir = IndexReader.Open(dir); DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); Assert.AreEqual(- 1, docNumAwareMapper.GetDocumentNumber()); ir.GetTermFreqVector(0, docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(1, docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(1, "f2", docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); ir.Close(); }
public virtual void TestReader() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); for (int j = 0; j < 5; j++) { TermFreqVector vector = reader.Get(j, testFields[0]); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); Assert.IsTrue(terms.Length == testTerms.Length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); Assert.IsTrue(term.Equals(testTerms[i])); } } }
private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; checkAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
internal void OpenDocStores(SegmentInfo si) { lock (this) { System.Diagnostics.Debug.Assert(si.name.Equals(segment)); if (fieldsReaderOrig == null) { Directory storeDir; if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { System.Diagnostics.Debug.Assert(storeCFSReader == null); storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } } else if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } storeDir = cfsReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } System.String storesSegment; if (si.GetDocStoreOffset() != - 1) { storesSegment = si.GetDocStoreSegment(); } else { storesSegment = segment; } fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount); } if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } } } }
public virtual void TestPositionReader() { TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random())); BytesRef[] terms; Terms vector = reader.Get(0).Terms(TestFields[0]); Assert.IsNotNull(vector); Assert.AreEqual(TestTerms.Length, vector.Size()); TermsEnum termsEnum = vector.Iterator(null); DocsAndPositionsEnum dpEnum = null; for (int i = 0; i < TestTerms.Length; i++) { BytesRef text = termsEnum.Next(); Assert.IsNotNull(text); string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(TestTerms[i], term); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsNotNull(dpEnum); int doc = dpEnum.DocID(); Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(dpEnum.Freq(), Positions[i].Length); for (int j = 0; j < Positions[i].Length; j++) { Assert.AreEqual(Positions[i][j], dpEnum.NextPosition()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); doc = dpEnum.DocID(); Assert.AreEqual(-1, doc); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.IsNotNull(dpEnum); Assert.AreEqual(dpEnum.Freq(), Positions[i].Length); for (int j = 0; j < Positions[i].Length; j++) { Assert.AreEqual(Positions[i][j], dpEnum.NextPosition()); Assert.AreEqual(j * 10, dpEnum.StartOffset()); Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); } Terms freqVector = reader.Get(0).Terms(TestFields[1]); //no pos, no offset Assert.IsNotNull(freqVector); Assert.AreEqual(TestTerms.Length, freqVector.Size()); termsEnum = freqVector.Iterator(null); Assert.IsNotNull(termsEnum); for (int i = 0; i < TestTerms.Length; i++) { BytesRef text = termsEnum.Next(); Assert.IsNotNull(text); string term = text.Utf8ToString(); //System.out.println("Term: " + term); Assert.AreEqual(TestTerms[i], term); Assert.IsNotNull(termsEnum.Docs(null, null)); Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos } reader.Dispose(); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = CfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { CfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos_Renamed; this.TermsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat(); SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields Fields = format.FieldsProducer(segmentReadState); Debug.Assert(Fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms()) { NormsProducer = codec.NormsFormat().NormsProducer(segmentReadState); Debug.Assert(NormsProducer != null); } else { NormsProducer = null; } StoredFieldsFormat sff = si.Info.Codec.StoredFieldsFormat(); try { FieldsReaderOrig = sff.FieldsReader(cfsDir, si.Info, fieldInfos, context); } catch (System.AccessViolationException ave) { } //FieldsReaderOrig = si.Info.Codec.StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors()) // open term vector files only as needed { TermVectorsReaderOrig = si.Info.Codec.TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { TermVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(() => (StoredFieldsReader)fieldsReaderOrig.Clone()); termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(() => (termVectorsReaderOrig is null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone()); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(fields != null); } // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(normsProducer != null); } } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) deletedDocs = new BitVector(Directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != -1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else { storeDir = null; } fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) { if (!fieldInfos.FieldInfo(i).omitTf) { anyProx = true; } } System.String fieldsSegment; if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); } else { fieldsSegment = segment; } if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) { proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); } OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != -1) { vectorsSegment = si.GetDocStoreSegment(); } else { vectorsSegment = segment; } termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc; ) { if (reader.IsDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) break; if (reader.IsDeleted(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (reader.IsDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
public virtual void TestMapper() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, mapper); var set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Check offsets and positions for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(1, mapper); set_Renamed = mapper.TermVectorEntrySet; Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Should have offsets and positions b/c we are munging all the fields together for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); var map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } //Try mapper that ignores offs and positions fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); map = fsMapper.FieldToTerms; Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (var iterator = map.GetEnumerator(); iterator.MoveNext();) { var entry = iterator.Current; var sortedSet = entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (var inner = sortedSet.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.Field; if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } // test setDocumentNumber() IndexReader ir = IndexReader.Open(dir, true); DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber()); ir.GetTermFreqVector(0, docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, "f2", docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); ir.Close(); }
/// <summary> Do a bulk copy of numDocs documents from reader to our /// streams. This is used to expedite merging, if the /// field numbers are congruent. /// </summary> internal void AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) { long tvdPosition = tvd.GetFilePointer(); long tvfPosition = tvf.GetFilePointer(); long tvdStart = tvdPosition; long tvfStart = tvfPosition; for (int i = 0; i < numDocs; i++) { tvx.WriteLong(tvdPosition); tvdPosition += tvdLengths[i]; tvx.WriteLong(tvfPosition); tvfPosition += tvfLengths[i]; } tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart); tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart); System.Diagnostics.Debug.Assert(tvd.GetFilePointer() == tvdPosition); System.Diagnostics.Debug.Assert(tvf.GetFilePointer() == tvfPosition); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
public virtual void TestOffsetReader() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); TermPositionVector vector = (TermPositionVector) reader.Get(0, testFields[0]); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); Assert.IsTrue(terms.Length == testTerms.Length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); Assert.IsTrue(term.Equals(testTerms[i])); int[] positions = vector.GetTermPositions(i); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Length == this.positions[i].Length); for (int j = 0; j < positions.Length; j++) { int position = positions[j]; Assert.IsTrue(position == this.positions[i][j]); } TermVectorOffsetInfo[] offset = vector.GetOffsets(i); Assert.IsTrue(offset != null); Assert.IsTrue(offset.Length == this.offsets[i].Length); for (int j = 0; j < offset.Length; j++) { TermVectorOffsetInfo termVectorOffsetInfo = offset[j]; Assert.IsTrue(termVectorOffsetInfo.Equals(offsets[i][j])); } } }
private void CheckTermVector(TermVectorsReader reader, int docNum, System.String field) { TermFreqVector vector = reader.Get(docNum, field); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); Assert.IsTrue(terms.Length == testTerms.Length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; Assert.IsTrue(term.Equals(testTerms[i])); } }
public virtual void TestBadParams() { try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //Bad document number, good field number reader.Get(50, testFields[0]); Assert.Fail(); } catch (System.IO.IOException e) { // expected exception } try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //Bad document number, no field reader.Get(50); Assert.Fail(); } catch (System.IO.IOException e) { // expected exception } try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); //good document number, bad field number TermFreqVector vector = reader.Get(0, "f50"); Assert.IsTrue(vector == null); } catch (System.IO.IOException e) { Assert.Fail(); } }
public virtual void TestMultipleDocuments() { try { TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos); Assert.IsTrue(writer != null); for (int i = 0; i < 10; i++) { WriteDocument(writer, testFields.Length); } writer.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } //Do some arbitrary tests try { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); for (int i = 0; i < 10; i++) { Assert.IsTrue(reader != null); CheckTermVector(reader, 5, testFields[0]); CheckTermVector(reader, 2, testFields[2]); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else storeDir = null; fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) if (!fieldInfos.FieldInfo(i).omitTf) anyProx = true; System.String fieldsSegment; if (si.GetDocStoreOffset() != - 1) fieldsSegment = si.GetDocStoreSegment(); else fieldsSegment = segment; if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != - 1) vectorsSegment = si.GetDocStoreSegment(); else vectorsSegment = segment; termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }