NOTE: This API is new and still experimental (subject to change suddenly in the next release)
public override void SetUp() { base.SetUp(); SetUpInternal(); DocHelper.SetupDoc(testDoc); info = DocHelper.WriteDoc(dir, testDoc); }
/// <summary> /// Create a {@code SegmentReadState}. </summary> public SegmentReadState(Directory dir, SegmentInfo info, FieldInfos fieldInfos, IOContext context, int termsIndexDivisor, string segmentSuffix) { this.Directory = dir; this.SegmentInfo = info; this.FieldInfos = fieldInfos; this.Context = context; this.TermsIndexDivisor = termsIndexDivisor; this.SegmentSuffix = segmentSuffix; }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { // Load the index into memory string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION); indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM) { indexStream.ReadVLong(); // the end of the data file CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION); vectorsStream = d.OpenInput(vectorsStreamFN, context); string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT; int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (version != version2) { throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.FilePointer); packedIntsVersion = vectorsStream.ReadVInt(); chunkSize = vectorsStream.ReadVInt(); decompressor = compressionMode.NewDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
private int NumBufferedDocs; // docBase + numBufferedDocs == current doc ID /// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { Debug.Assert(directory != null); this.Directory = directory; this.Segment = si.Name; this.SegmentSuffix = segmentSuffix; this.CompressionMode = compressionMode; this.Compressor = compressionMode.NewCompressor(); this.ChunkSize = chunkSize; this.DocBase = 0; this.BufferedDocs = new GrowableByteArrayDataOutput(chunkSize); this.NumStoredFields = new int[16]; this.EndOffsets = new int[16]; this.NumBufferedDocs = 0; bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION), context); try { FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(FieldsStream, codecNameDat, VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); IndexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; FieldsStream.WriteVInt(chunkSize); FieldsStream.WriteVInt(PackedInts.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(indexStream); Abort(); } } }
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate) { // validate incoming readers if (validate) { foreach (AtomicReader reader in readers) { reader.CheckIntegrity(); } } MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); Directory = dir; this.TermIndexInterval = termIndexInterval; this.Codec = segmentInfo.Codec; this.Context = context; this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); MergeState.SegmentInfo.DocCount = SetDocMaps(); }
public void Read(Directory directory) { IndexInput input = directory.OpenInput(IndexFileNames.SEGMENTS); try { int format = input.ReadInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < FORMAT) throw new System.IO.IOException("Unknown format version: " + format); version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.ReadInt(); i > 0; i--) { // read segmentInfos SegmentInfo si = new SegmentInfo(input.ReadString(), input.ReadInt(), directory); Add(si); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; // old file format without version number else version = input.ReadLong(); // read version } } finally { input.Close(); } }
/// <summary> Copy everything from src SegmentInfo into our instance.</summary> internal void Reset(SegmentInfo src) { name = src.name; docCount = src.docCount; dir = src.dir; preLockless = src.preLockless; delGen = src.delGen; if (src.normGen == null) { normGen = null; } else { normGen = new long[src.normGen.Length]; Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length); } isCompoundFile = src.isCompoundFile; hasSingleNormFile = src.hasSingleNormFile; }
// public static boolean DEBUG = false; /// <summary> /// Sole constructor. </summary> public Lucene41PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, string segmentSuffix) { bool success = false; IndexInput docIn = null; IndexInput posIn = null; IndexInput payIn = null; try { docIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), ioContext); Version = CodecUtil.CheckHeader(docIn, Lucene41PostingsWriter.DOC_CODEC, Lucene41PostingsWriter.VERSION_START, Lucene41PostingsWriter.VERSION_CURRENT); forUtil = new ForUtil(docIn); if (fieldInfos.HasProx()) { posIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), ioContext); CodecUtil.CheckHeader(posIn, Lucene41PostingsWriter.POS_CODEC, Version, Version); if (fieldInfos.HasPayloads() || fieldInfos.HasOffsets()) { payIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), ioContext); CodecUtil.CheckHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, Version, Version); } } this.DocIn = docIn; this.PosIn = posIn; this.PayIn = payIn; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(docIn, posIn, payIn); } } }
/// <summary> /// Sole constructor. /// </summary> /// <param name="info"> /// <seealso cref="SegmentInfo"/> that we wrap </param> /// <param name="delCount"> /// number of deleted documents in this segment </param> /// <param name="delGen"> /// deletion generation number (used to name deletion files) </param> /// <param name="fieldInfosGen"> /// FieldInfos generation number (used to name field-infos files) /// </param> public SegmentCommitInfo(SegmentInfo info, int delCount, long delGen, long fieldInfosGen) { this.Info = info; this.DelCount_Renamed = delCount; this.DelGen_Renamed = delGen; if (delGen == -1) { NextWriteDelGen = 1; } else { NextWriteDelGen = delGen + 1; } this.FieldInfosGen_Renamed = fieldInfosGen; if (fieldInfosGen == -1) { NextWriteFieldInfosGen = 1; } else { NextWriteFieldInfosGen = fieldInfosGen + 1; } }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(bool readOnly, SegmentInfo si, int termInfosIndexDivisor) { return Get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor); }
internal static bool HasDeletions(SegmentInfo si) { return(si.dir.FileExists(si.name + ".del")); }
/// <summary>Returns true if this single nfo is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(IndexWriter writer, SegmentInfo info) { return(!info.HasDeletions() && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile); }
/// <summary>Checks if any merges are now necessary and returns a /// {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos) { int numSegments = infos.Count; if (Verbose()) { Message("findMerges: " + numSegments + " segments"); } // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float)System.Math.Log(mergeFactor); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) { size = 1; } levels[i] = (float)System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) { levelFloor = (float)0.0; } else { levelFloor = (float)(System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) { maxLevel = level; } } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) { // All remaining segments fall into the min level levelBottom = -1.0F; } else { levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; } } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } if (Verbose()) { Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); } // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) { spec = new MergeSpecification(); } if (Verbose()) { Message(" " + start + " to " + end + ": add this merge"); } spec.Add(MakeOneMerge(infos, infos.Range(start, end))); } else if (Verbose()) { Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); } start = end; end = start + mergeFactor; } start = 1 + upto; } return(spec); }
public static SegmentReader Get(SegmentInfo si) { return(Get(si.dir, si, null, false, false)); }
protected internal override long Size(SegmentInfo info) { return SizeDocs(info); }
public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir) { return(Get(si.dir, si, sis, closeDir, true)); }
internal virtual SegmentReader ReopenSegment(SegmentInfo si) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[fieldInfos.Size()]; if (normsUpToDate) { for (int i = 0; i < fieldInfos.Size(); i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } } if (normsUpToDate && deletionsUpToDate) { return(this); } // clone reader SegmentReader clone = new SegmentReader(); bool success = false; try { clone.directory = directory; clone.si = si; clone.segment = segment; clone.readBufferSize = readBufferSize; clone.cfsReader = cfsReader; clone.storeCFSReader = storeCFSReader; clone.fieldInfos = fieldInfos; clone.tis = tis; clone.freqStream = freqStream; clone.proxStream = proxStream; clone.termVectorsReaderOrig = termVectorsReaderOrig; // we have to open a new FieldsReader, because it is not thread-safe // and can thus not be shared among multiple SegmentReaders // TODO: Change this in case FieldsReader becomes thread-safe in the future System.String fieldsSegment; Directory storeDir = Directory(); if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); if (storeCFSReader != null) { storeDir = storeCFSReader; } } else { fieldsSegment = segment; if (cfsReader != null) { storeDir = cfsReader; } } if (fieldsReader != null) { clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } if (!deletionsUpToDate) { // load deleted docs clone.deletedDocs = null; clone.LoadDeletedDocs(); } else { clone.deletedDocs = this.deletedDocs; } clone.norms = new System.Collections.Hashtable(); if (!normsUpToDate) { // load norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // copy unchanged norms to the cloned reader and incRef those norms if (!fieldNormsChanged[i]) { System.String curField = fieldInfos.FieldInfo(i).name; Norm norm = (Norm)this.norms[curField]; norm.IncRef(); clone.norms[curField] = norm; } } clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize); } else { System.Collections.IEnumerator it = norms.Keys.GetEnumerator(); while (it.MoveNext()) { System.String field = (System.String)it.Current; Norm norm = (Norm)norms[field]; norm.IncRef(); clone.norms[field] = norm; } } if (clone.singleNormStream == null) { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { Directory d = si.GetUseCompoundFile() ? cfsReader : Directory(); System.String fileName = si.GetNormFileName(fi.number); if (si.HasSeparateNorms(fi.number)) { continue; } if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { clone.singleNormStream = d.OpenInput(fileName, readBufferSize); break; } } } } success = true; } finally { if (this.referencedSegmentReader != null) { // this reader shares resources with another SegmentReader, // so we increment the other readers refCount. We don't // increment the refCount of the norms because we did // that already for the shared norms clone.referencedSegmentReader = this.referencedSegmentReader; referencedSegmentReader.IncRefReaderNotNorms(); } else { // this reader wasn't reopened, so we increment this // readers refCount clone.referencedSegmentReader = this; IncRefReaderNotNorms(); } if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return(clone); } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != -1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else { storeDir = null; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); System.String fieldsSegment; if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); } else { fieldsSegment = segment; } if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != -1) { vectorsSegment = si.GetDocStoreSegment(); } else { vectorsSegment = segment; } termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir, int readBufferSize) { return(Get(dir, si, sis, closeDir, ownDir, readBufferSize, true)); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir) { return(Get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true)); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> internal static SegmentReader Get(SegmentInfo si, int readBufferSize, bool doOpenStores) { return(Get(si.dir, si, null, false, false, readBufferSize, doOpenStores)); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(SegmentInfo si, int readBufferSize) { return(Get(si.dir, si, null, false, false, readBufferSize, true)); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> internal static SegmentReader Get(SegmentInfo si, bool doOpenStores) { return(Get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores)); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(bool readOnly, Directory dir, SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor) { SegmentReader instance; try { if (readOnly) instance = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL); else instance = (SegmentReader) System.Activator.CreateInstance(IMPL); } catch (System.Exception e) { throw new System.SystemException("cannot load SegmentReader class: " + e, e); } instance.readOnly = readOnly; instance.si = si; instance.readBufferSize = readBufferSize; bool success = false; try { instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor); if (doOpenStores) { instance.core.OpenDocStores(si); } instance.LoadDeletedDocs(); instance.OpenNorms(instance.core.cfsDir, readBufferSize); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { instance.DoClose(); } } return instance; }
/// <summary>Flush all pending docs to a new segment </summary> internal int Flush(bool closeDocStore) { lock (this) { System.Diagnostics.Debug.Assert(AllThreadsIdle()); System.Diagnostics.Debug.Assert(numDocsInRAM > 0); System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM); System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0); System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); InitFlushState(false); docStoreOffset = numDocsInStore; if (infoStream != null) Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); bool success = false; try { if (closeDocStore) { System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null); System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName)); CloseDocStore(); flushState.numDocsInStore = 0; } System.Collections.Hashtable threads = new System.Collections.Hashtable(); for (int i = 0; i < threadStates.Length; i++) threads[threadStates[i].consumer] = threadStates[i].consumer; consumer.Flush(threads, flushState); if (infoStream != null) { SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory); long newSegmentSize = si.SizeInBytes(); System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}", new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) }); Message(message); } flushedDocCount += flushState.numDocs; DoAfterFlush(); success = true; } finally { if (!success) { Abort(); } } System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); return flushState.numDocs; } }
internal static bool HasDeletions(SegmentInfo si) { // Don't call ensureOpen() here (it could affect performance) return(si.HasDeletions()); }
/// <summary> Test stored fields for a segment.</summary> private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { var status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.Write(" test: stored fields......."); } // Scan stored fields for all documents for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; Document doc = reader.Document(j); status.totFields += doc.GetFields().Count; } } // Validate docCount if (status.docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
internal static bool UsesCompoundFile(SegmentInfo si) { return(si.GetUseCompoundFile()); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(SegmentInfo si) { return(Get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true)); }
internal static bool HasSeparateNorms(SegmentInfo si) { return(si.HasSeparateNorms()); }
/// <summary> Finds merges necessary to expunge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) { int numSegments = segmentInfos.Count; if (Verbose()) { Message("findMergesToExpungeDeletes: " + numSegments + " segments"); } MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = -1; for (int i = 0; i < numSegments; i++) { SegmentInfo info = segmentInfos.Info(i); int delCount = writer.NumDeletedDocs(info); if (delCount > 0) { if (Verbose()) { Message(" segment " + info.name + " has deletions"); } if (firstSegmentWithDeletions == -1) { firstSegmentWithDeletions = i; } else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); } return(spec); }
public override void SetUp() { base.SetUp(); DocHelper.SetupDoc(testDoc); info = DocHelper.WriteDoc(dir, testDoc); }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
public virtual void TestFixedPostings() { const int NUM_TERMS = 100; TermData[] terms = new TermData[NUM_TERMS]; for (int i = 0; i < NUM_TERMS; i++) { int[] docs = new int[] { i }; string text = Convert.ToString(i); terms[i] = new TermData(this, text, docs, null); } FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData field = new FieldData(this, "field", builder, terms, true, false); FieldData[] fields = new FieldData[] { field }; FieldInfos fieldInfos = builder.Finish(); // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (Directory dir = NewDirectory()) { this.Write(fieldInfos, dir, fields, true); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR))) { IEnumerator <string> fieldsEnum = reader.GetEnumerator(); fieldsEnum.MoveNext(); string fieldName = fieldsEnum.Current; Assert.IsNotNull(fieldName); Terms terms2 = reader.GetTerms(fieldName); Assert.IsNotNull(terms2); TermsEnum termsEnum = terms2.GetEnumerator(); DocsEnum docsEnum = null; for (int i = 0; i < NUM_TERMS; i++) { Assert.IsTrue(termsEnum.MoveNext()); BytesRef term = termsEnum.Term; Assert.AreEqual(terms[i].text2, term.Utf8ToString()); // do this twice to stress test the codec's reuse, ie, // make sure it properly fully resets (rewinds) its // internal state: for (int iter = 0; iter < 2; iter++) { docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE); Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } } Assert.IsFalse(termsEnum.MoveNext()); for (int i = 0; i < NUM_TERMS; i++) { Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND); } Assert.IsFalse(fieldsEnum.MoveNext()); } } }
internal static bool UsesCompoundFile(SegmentInfo si) { return(si.dir.FileExists(si.name + ".cfs")); }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { Status.TermIndexStatus status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc(); while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
public static SegmentReader Get(SegmentInfo si) { return Get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); }
/// <summary>Returns true if this single info is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(SegmentInfo info) { bool hasDeletions = writer.NumDeletedDocs(info) > 0; return(!hasDeletions && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile); }
internal static SegmentReader Get(SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor) { return Get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor); }
public static SegmentReader Get(SegmentInfo si) { return Get(si.dir, si, null, false, false); }
internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()]; int fieldCount = core.fieldInfos.Size(); for (int i = 0; i < fieldCount; i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic // also if both old and new readers aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { return this; } // When cloning, the incoming SegmentInfos should not // have any changes in it: System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate)); // clone reader SegmentReader clone; try { if (openReadOnly) clone = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL); else clone = (SegmentReader) System.Activator.CreateInstance(IMPL); } catch (System.Exception e) { throw new System.SystemException("cannot load SegmentReader class: " + e, e); } bool success = false; try { core.IncRef(); clone.core = core; clone.readOnly = openReadOnly; clone.si = si; clone.readBufferSize = readBufferSize; if (!openReadOnly && hasChanges) { // My pending changes transfer to the new reader clone.pendingDeleteCount = pendingDeleteCount; clone.deletedDocsDirty = deletedDocsDirty; clone.normsDirty = normsDirty; clone.hasChanges = hasChanges; hasChanges = false; } if (doClone) { if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs System.Diagnostics.Debug.Assert(clone.deletedDocs == null); clone.LoadDeletedDocs(); } else if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } clone.SetDisableFakeNorms(GetDisableFakeNorms()); clone.norms = new System.Collections.Hashtable(); // Clone norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { System.String curField = core.fieldInfos.FieldInfo(i).name; Norm norm = (Norm) this.norms[curField]; if (norm != null) clone.norms[curField] = norm.Clone(); } } // If we are not cloning, then this will open anew // any norms that have changed: clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize); success = true; } finally { if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return clone; } }
public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir) { SegmentReader instance; try { instance = (SegmentReader) System.Activator.CreateInstance(IMPL); } catch (System.Exception e) { throw new System.SystemException("cannot load SegmentReader class: " + e, e); } instance.Init(dir, sis, closeDir, ownDir); instance.Initialize(si); return instance; }
// got the idea for this from George's note in TestSegmentReader // it seems that JUnit creates a new instance of the class for each test invocation // while NUnit does not (seems like a flaw in JUnit, to be honest) // forcing the re-init of the variables for each run solves the problem private void SetUpInternal() { dir = new RAMDirectory(); testDoc = new Lucene.Net.Documents.Document(); info = null; }
internal static bool HasDeletions(SegmentInfo si) { return si.HasDeletions(); }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { var status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term var myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc; while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term; int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = - 1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc; int freq = termPositions.Freq; if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = - 1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < - 1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
internal static bool HasSeparateNorms(SegmentInfo si) { return si.HasSeparateNorms(); }
/// <summary> Test term vectors for a segment.</summary> private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { var status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.Write(" test: term vectors........"); } for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; ITermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.Length; } } } Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
internal void ReWrite(SegmentInfo si) { // NOTE: norms are re-written in regular directory, not cfs System.String oldFileName = si.GetNormFileName(this.number); if (oldFileName != null && !oldFileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { // Mark this file for deletion. Note that we don't // actually try to delete it until the new segments files is // successfully written: Enclosing_Instance.deleter.AddPendingFile(oldFileName); } si.AdvanceNormGen(this.number); IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(si.GetNormFileName(this.number)); try { out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc()); } finally { out_Renamed.Close(); } this.dirty = false; }
protected internal override long Size(SegmentInfo info, IState state) { return(SizeDocs(info, state)); }
internal virtual void SetSegmentInfo(SegmentInfo info) { si = info; }
public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir) { return Get(si.dir, si, sis, closeDir, true); }
public FlushFailedEvent(SegmentInfo info) { this.info = info; }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
/// <summary> Returns true if a newly flushed (not from merge) /// segment should use the compound file format. /// </summary> public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
internal static bool UsesCompoundFile(SegmentInfo si) { return si.GetUseCompoundFile(); }
// Javadoc inherited public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) { return(useCompoundFile); }
abstract protected internal long Size(SegmentInfo info);
/// <summary>Returns the merges necessary to optimize the index. /// This merge policy defines "optimized" to mean only one /// segment in the index, where that segment has no /// deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { MergeSpecification spec; System.Diagnostics.Debug.Assert(maxNumSegments > 0); if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.Count; while (last > 0) { SegmentInfo info = infos.Info(--last); if (segmentsToOptimize.Contains(info)) { last++; break; } } if (last > 0) { spec = new MergeSpecification(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.merges.Count) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !IsOptimized(infos.Info(0))) { spec.Add(MakeOneMerge(infos, infos.Range(0, last))); } } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // optimize was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) { sumSize += Size(infos.Info(j + i)); } if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); } } } else { spec = null; } } else { spec = null; } return(spec); }