protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc) { lock (parent) { this.deletedDocs = parent.deletedDocs; } }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().SkipInterval; this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels; }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { var status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term var myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc; while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term; int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = - 1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc; int freq = termPositions.Freq; if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = - 1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < - 1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
/// <summary> Test field norms.</summary> private Status.FieldNormStatus TestFieldNorms(IEnumerable<string> fieldNames, SegmentReader reader) { var status = new Status.FieldNormStatus(); try { // Test Field Norms if (infoStream != null) { infoStream.Write(" test: field norms........."); } var b = new byte[reader.MaxDoc]; foreach(string fieldName in fieldNames) { if (reader.HasNorms(fieldName)) { reader.Norms(fieldName, b, 0); ++status.totFields; } } Msg("OK [" + status.totFields + " fields]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
internal MySegmentTermDocs(SegmentReader p):base(p) { }
/// <summary>Carefully merges deletes for the segments we just /// merged. This is tricky because, although merging will /// clear all deletes (compacts the documents), new /// deletes may have been flushed to the segments since /// the merge was started. This method "carries over" /// such new deletes onto the newly merged segment, and /// saves the resulting deletes file (incrementing the /// delete generation for merge.info). If no deletes were /// flushed, no new deletes file is saved. /// </summary> private void CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); SegmentInfos sourceSegments = merge.segments; if (infoStream != null) Message("commitMergeDeletes " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; int delCount = 0; for (int i = 0; i < sourceSegments.Count; i++) { SegmentInfo info = sourceSegments.Info(i); int docCount = info.docCount; SegmentReader previousReader = merge.readersClone[i]; SegmentReader currentReader = merge.readers[i]; if (previousReader.HasDeletions) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. if (currentReader.NumDeletedDocs > previousReader.NumDeletedDocs) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: for (int j = 0; j < docCount; j++) { if (previousReader.IsDeleted(j)) { System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j)); } else { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } } else { docUpto += docCount - previousReader.NumDeletedDocs; } } else if (currentReader.HasDeletions) { // This segment had no deletes before but now it // does: for (int j = 0; j < docCount; j++) { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } // No deletes before or after else docUpto += info.docCount; } System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs == delCount); mergeReader.hasChanges = delCount > 0; } }
/// <summary> Release the segment reader (i.e. decRef it and close if there /// are no more references. /// </summary> /// <param name="sr"> /// </param> /// <throws> IOException </throws> public virtual void Release(SegmentReader sr) { lock (this) { Release(sr, false); } }
internal SegmentTermPositions(SegmentReader p):base(p) { this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingStoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader; // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader) { matchingFieldsReader = (CompressingStoredFieldsReader)fieldsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != compressionMode || matchingFieldsReader.ChunkSize != chunkSize) // the way data is decompressed depends on the chunk size - means reader version is not the same as the writer version { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Document doc = reader.Document(i); AddDocument(doc, mergeState.FieldInfos); ++docCount; mergeState.CheckAbort.Work(300); } } else { int docID = NextLiveDoc(0, liveDocs, maxDoc); if (docID < maxDoc) { // not all docs were deleted CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID); int[] startOffsets = new int[0]; do { // go to the next chunk that contains docID it.Next(docID); // transform lengths into offsets if (startOffsets.Length < it.chunkDocs) { startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)]; } for (int i = 1; i < it.chunkDocs; ++i) { startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; } if (numBufferedDocs == 0 && startOffsets[it.chunkDocs - 1] < chunkSize && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize && NextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) // no deletion in the chunk - chunk is large enough - chunk is small enough - starting a new chunk { Debug.Assert(docID == it.docBase); // no need to decompress, just copy data indexWriter.WriteIndex(it.chunkDocs, fieldsStream.GetFilePointer()); WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths); it.CopyCompressedData(fieldsStream); this.docBase += it.chunkDocs; docID = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc); docCount += it.chunkDocs; mergeState.CheckAbort.Work(300 * it.chunkDocs); } else { // decompress it.Decompress(); if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.Length) { throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.Length); } // copy non-deleted docs for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc)) { int diff = docID - it.docBase; StartDocument(it.numStoredFields[diff]); bufferedDocs.WriteBytes(it.bytes.Bytes, it.bytes.Offset + startOffsets[diff], it.lengths[diff]); FinishDocument(); ++docCount; mergeState.CheckAbort.Work(300); } } } while (docID < maxDoc); it.CheckIntegrity(); } } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
/// <summary>This constructor is only used for <see cref="Reopen()" /> </summary> internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, IEnumerable<KeyValuePair<string, byte[]>> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor) { this.internalDirectory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(infos.Files(directory, true)); } // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name IDictionary<string, int> segmentReaders = new HashMap<string, int>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.Length; i++) { segmentReaders[oldReaders[i].SegmentName] = i; } } var newReaders = new SegmentReader[infos.Count]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers var readerShared = new bool[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { // find SegmentReader for this segment if (!segmentReaders.ContainsKey(infos.Info(i).name)) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]]; } bool success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile()) { // We should never see a totally new segment during cloning System.Diagnostics.Debug.Assert(!doClone); // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor); } else { newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly); } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReader.IncRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } finally { if (!success) { for (i++; i < infos.Count; i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache Initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { foreach(var entry in oldNormsCache) { String field = entry.Key; if (!HasNorms(field)) { continue; } byte[] oldBytes = entry.Value; var bytes = new byte[MaxDoc]; for (int i = 0; i < subReaders.Length; i++) { int oldReaderIndex = segmentReaders[subReaders[i].SegmentName]; // this SegmentReader was not re-opened, we can copy all of its norms if (segmentReaders.ContainsKey(subReaders[i].SegmentName) && (oldReaders[oldReaderIndex] == subReaders[i] || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field])) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, // in which case no old norms cache is present, or it is called from MultiReader.reopen(), // which is synchronized Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].Norms(field, bytes, starts[i]); } } normsCache[field] = bytes; // update cache } } }
private void Initialize(SegmentReader[] subReaders) { this.subReaders = subReaders; starts = new int[subReaders.Length + 1]; // build starts array for (int i = 0; i < subReaders.Length; i++) { starts[i] = maxDoc; maxDoc += subReaders[i].MaxDoc; // compute maxDocs if (subReaders[i].HasDeletions) hasDeletions = true; } starts[subReaders.Length] = maxDoc; if (!readOnly) { maxIndexVersion = SegmentInfos.ReadCurrentVersion(internalDirectory); } }
// Used by near real-time search internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) { this.internalDirectory = writer.Directory; this.readOnly = true; segmentInfos = infos; segmentInfosStart = (SegmentInfos) infos.Clone(); this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(infos.Files(internalDirectory, true)); } // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; var readers = new SegmentReader[numSegments]; Directory dir = writer.Directory; int upto = 0; for (int i = 0; i < numSegments; i++) { bool success = false; try { SegmentInfo info = infos.Info(i); if (info.dir == dir) { readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor); } success = true; } finally { if (!success) { // Close all readers we had opened: for (upto--; upto >= 0; upto--) { try { readers[upto].Close(); } catch (System.Exception) { // keep going - we want to clean up as much as possible } } } } } this.writer = writer; if (upto < readers.Length) { // This means some segments were in a foreign Directory var newReaders = new SegmentReader[upto]; Array.Copy(readers, 0, newReaders, 0, upto); readers = newReaders; } Initialize(readers); }
/// <summary>Construct reading the named set of readers. </summary> internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { internalDirectory = directory; this.readOnly = readOnly; this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(sis.Files(directory, true)); } // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. var readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { bool success = false; try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor); success = true; } finally { if (!success) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.Exception) { // keep going - we want to clean up as much as possible } } } } } Initialize(readers); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions()) { docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Close(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long)docCount) * 8 != fdxFileLength) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { docCount += ((IndexReader)iter.Current).NumDocs(); } } return(docCount); }
/// <summary> Test stored fields for a segment.</summary> private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { var status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.Write(" test: stored fields......."); } // Scan stored fields for all documents for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; Document.Document doc = reader.Document(j); status.totFields += doc.GetFields().Count; } } // Validate docCount if (status.docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader compressingTermVectorsReader) { matchingVectorsReader = compressingTermVectorsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.GetFilePointer()) { vectorsStream.Seek(startPointer); } if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); } if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.GetFilePointer(); indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer()); this.vectorsStream.WriteVInt32(docCount); this.vectorsStream.WriteVInt32(chunkDocs); this.vectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.numDocs += chunkDocs; mergeState.CheckAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
/// <summary> Test term vectors for a segment.</summary> private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { var status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.Write(" test: term vectors........"); } for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; ITermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.Length; } } } Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
public override void Run() { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread"); } while (Environment.TickCount < stopTimeMS) { try { IndexSearcher s = outerInstance.GetCurrentSearcher(); try { // Verify 1) IW is correctly setting // diagnostics, and 2) segment warming for // merged segments is actually happening: foreach (AtomicReaderContext sub in s.IndexReader.Leaves) { SegmentReader segReader = (SegmentReader)sub.Reader; IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics; assertNotNull(diagnostics); string source; diagnostics.TryGetValue("source", out source); assertNotNull(source); if (source.Equals("merge", StringComparison.Ordinal)) { assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo, !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.ContainsKey(segReader.core)); } } if (s.IndexReader.NumDocs > 0) { outerInstance.SmokeTestSearcher(s); Fields fields = MultiFields.GetFields(s.IndexReader); if (fields == null) { continue; } Terms terms = fields.GetTerms("body"); if (terms == null) { continue; } TermsEnum termsEnum = terms.GetIterator(null); int seenTermCount = 0; int shift; int trigger; if (totTermCount.Get() < 30) { shift = 0; trigger = 1; } else { trigger = totTermCount.Get() / 30; shift = Random.Next(trigger); } while (Environment.TickCount < stopTimeMS) { BytesRef term = termsEnum.Next(); if (term == null) { totTermCount.Set(seenTermCount); break; } seenTermCount++; // search 30 terms if ((seenTermCount + shift) % trigger == 0) { //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString()); //} totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", term)))); } } //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + ": search done"); //} } } finally { outerInstance.ReleaseSearcher(s); } } catch (Exception t) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); outerInstance.m_failed.Set(true); Console.WriteLine(t.ToString()); throw new Exception(t.ToString(), t); } } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge, mergedReader); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: SetMergeDocStoreIsCompoundFile(merge); merge.info.HasProx = merger.HasProx(); segmentInfos.RemoveRange(start, start + merge.segments.Count - start); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); CloseMergeReaders(merge, false); // Must note the change to segmentInfos so any commits // in-flight don't lose it: Checkpoint(); // If the merged segments had pending changes, clear // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.Clear(merge.segments); if (merge.optimize) { // cascade the optimize: segmentsToOptimize.Add(merge.info); } return true; } }
internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.Generic.IDictionary<string, byte[]> oldNormsCache, bool doClone, int termInfosIndexDivisor) : base(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) { }
/// <summary> Release the segment reader (i.e. decRef it and close if there /// are no more references. /// </summary> /// <param name="sr"> /// </param> /// <param name="drop"></param> /// <throws> IOException </throws> public virtual void Release(SegmentReader sr, bool drop) { lock (this) { bool pooled = readerMap.ContainsKey(sr.SegmentInfo); System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.SegmentInfo] == sr); // Drop caller's ref; for an external reader (not // pooled), this decRef will close it sr.DecRef(); if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.RefCount == 1))) { // We invoke deleter.checkpoint below, so we must be // sync'd on IW if there are changes: // TODO: Java 1.5 has this, .NET can't. // System.Diagnostics.Debug.Assert(!sr.hasChanges || Thread.holdsLock(enclosingInstance)); // Discard (don't save) changes when we are dropping // the reader; this is used only on the sub-readers // after a successful merge. sr.hasChanges &= !drop; bool hasChanges = sr.hasChanges; // Drop our ref -- this will commit any pending // changes to the dir sr.Close(); // We are the last ref to this reader; since we're // not pooling readers, we release it: readerMap.Remove(sr.SegmentInfo); if (hasChanges) { // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } } } }
public override void Run() { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread"); } while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTimeMS) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results { try { IndexSearcher s = outerInstance.GetCurrentSearcher(); try { // Verify 1) IW is correctly setting // diagnostics, and 2) segment warming for // merged segments is actually happening: foreach (AtomicReaderContext sub in s.IndexReader.Leaves) { SegmentReader segReader = (SegmentReader)sub.Reader; IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics; assertNotNull(diagnostics); diagnostics.TryGetValue("source", out string source); assertNotNull(source); if (source.Equals("merge", StringComparison.Ordinal)) { assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo, // LUCENENET: ConditionalWeakTable doesn't have ContainsKey, so we normalize to TryGetValue !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.TryGetValue(segReader.core, out BooleanRef _)); } } if (s.IndexReader.NumDocs > 0) { outerInstance.SmokeTestSearcher(s); Fields fields = MultiFields.GetFields(s.IndexReader); if (fields == null) { continue; } Terms terms = fields.GetTerms("body"); if (terms == null) { continue; } TermsEnum termsEnum = terms.GetEnumerator(); int seenTermCount = 0; int shift; int trigger; if (totTermCount < 30) { shift = 0; trigger = 1; } else { trigger = totTermCount / 30; shift = Random.Next(trigger); } while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTimeMS) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results { if (!termsEnum.MoveNext()) { totTermCount.Value = seenTermCount; break; } seenTermCount++; // search 30 terms if ((seenTermCount + shift) % trigger == 0) { //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString()); //} totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", termsEnum.Term)))); } } //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + ": search done"); //} } } finally { outerInstance.ReleaseSearcher(s); } } catch (Exception t) when(t.IsThrowable()) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); outerInstance.m_failed.Value = (true); Console.WriteLine(t.ToString()); throw RuntimeException.Create(t); } } }