/// <summary> Merges the readers specified by the {@link #add} method into the directory passed to the constructor</summary> /// <returns> The number of documents that were merged /// </returns> /// <throws> IOException </throws> public /*internal*/ int Merge() { int value_Renamed; value_Renamed = MergeFields(); MergeTerms(); MergeNorms(); if (fieldInfos.HasVectors()) { MergeVectors(); } return(value_Renamed); }
private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) deletedDocs = new BitVector(Directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != -1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else { storeDir = null; } fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) { if (!fieldInfos.FieldInfo(i).omitTf) { anyProx = true; } } System.String fieldsSegment; if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); } else { fieldsSegment = segment; } if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) { proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); } OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != -1) { vectorsSegment = si.GetDocStoreSegment(); } else { vectorsSegment = segment; } termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) deletedDocs = new BitVector(Directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = CfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { CfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos_Renamed; this.TermsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat(); SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields Fields = format.FieldsProducer(segmentReadState); Debug.Assert(Fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms()) { NormsProducer = codec.NormsFormat().NormsProducer(segmentReadState); Debug.Assert(NormsProducer != null); } else { NormsProducer = null; } StoredFieldsFormat sff = si.Info.Codec.StoredFieldsFormat(); try { FieldsReaderOrig = sff.FieldsReader(cfsDir, si.Info, fieldInfos, context); } catch (System.AccessViolationException ave) { } //FieldsReaderOrig = si.Info.Codec.StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors()) // open term vector files only as needed { TermVectorsReaderOrig = si.Info.Codec.TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { TermVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else storeDir = null; fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) if (!fieldInfos.FieldInfo(i).omitTf) anyProx = true; System.String fieldsSegment; if (si.GetDocStoreOffset() != - 1) fieldsSegment = si.GetDocStoreSegment(); else fieldsSegment = segment; if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != - 1) vectorsSegment = si.GetDocStoreSegment(); else vectorsSegment = segment; termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }