public virtual void TestExpungeDeletes() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(id); id.SetValue("0"); w.AddDocument(doc); id.SetValue("1"); w.AddDocument(doc); w.DeleteDocuments(new Term("id", "0")); IndexReader r = w.GetReader(); w.ExpungeDeletes(); w.Close(); r.Close(); r = IndexReader.Open(dir); Assert.AreEqual(1, r.NumDocs()); Assert.IsFalse(r.HasDeletions()); r.Close(); dir.Close(); }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.IEnumerator i = reader.GetFieldNames(IndexReader.FieldOption.ALL).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) i.Current; // update fieldToReader map System.String field = fi.Key.ToString(); if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); }
private void MergeNorms() { byte[] normBuffer = null; IndexOutput output = null; try { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { if (output == null) { output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length); } for (int j = 0; j < readers.Count; j++) { IndexReader reader = (IndexReader)readers[j]; int maxDoc = reader.MaxDoc(); if (normBuffer == null || normBuffer.Length < maxDoc) { // the buffer is too small for the current segment normBuffer = new byte[maxDoc]; } reader.Norms(fi.name, normBuffer, 0); if (!reader.HasDeletions()) { //optimized case for segments without deleted docs output.WriteBytes(normBuffer, maxDoc); } else { // this segment has deleted docs, so we have to // check for every doc if it is deleted or not for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(normBuffer[k]); } } } if (checkAbort != null) { checkAbort.Work(maxDoc); } } } } } finally { if (output != null) { output.Close(); } } }
// maps around deleted docs internal int[] GetDocMap() { if (docMap == null) { // build array which maps document numbers around deletions if (reader.HasDeletions()) { int maxDoc = reader.MaxDoc(); docMap = new int[maxDoc]; int j = 0; for (int i = 0; i < maxDoc; i++) { if (reader.IsDeleted(i)) { docMap[i] = -1; } else { docMap[i] = j++; } } } } return(docMap); }
/// <summary> Merge the TermVectors from each of the segments into the new one.</summary> /// <throws> IOException </throws> private void MergeVectors() { TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { int idx = 0; for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; TermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig(); // If the TV* files are an older format then they cannot read raw docs: if (vectorsReader != null && vectorsReader.CanReadRawDocs()) { matchingVectorsReader = vectorsReader; } } IndexReader reader = (IndexReader)iter.Current; if (reader.HasDeletions()) { CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader); } else { CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader); } } } finally { termVectorsWriter.Close(); } System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; long tvxSize = directory.FileLength(fileName); if (4 + ((long)mergedDocs) * 16 != tvxSize) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) { // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); } if (reader.NumDocs() != numDocs) { throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); } ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; IEnumerator <string> i = fields.GetEnumerator(); while (i.MoveNext()) { //// update fieldToReader map string field = i.Current; //if (fieldToReader[field] == null) if (!fieldToReader.ContainsKey(field)) { fieldToReader[field] = reader; } } if (!ignoreStoredFields) { storedFieldReaders.Add(reader); // add to storedFieldReaders } readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
} // doIndex private static void removeAllDuplicateAndDeletedFiles(IndexableFileInfo[] fileInfos, string LuceneIndexDir, IndexCreationMode indexCreationMode) { if (indexCreationMode != IndexCreationMode.AppendToExistingIndex) { return; } IndexReader reader = IndexReader.Open(LuceneIndexDir); try { int numDocs = reader.NumDocs(); for (int i = 0; i < numDocs; i++) { Document docToCheck = reader.Document(i); bool removeDocFromIndex = true; string filenameField = docToCheck.GetField("filename").StringValue(); string lastModified = (docToCheck.GetField("LastModified").StringValue()); foreach (IndexableFileInfo fi in fileInfos) { if (String.Compare(fi.Filename, filenameField, true) == 0 && DateTools.DateToString(fi.LastModified, DateTools.Resolution.SECOND) == lastModified) { removeDocFromIndex = false; break; } } // foreach if (removeDocFromIndex) { reader.DeleteDocument(i); if (!reader.HasDeletions()) { throw new Exception("error: deletion failed!!"); } } } // for each lucene doc } finally { reader.Close(); } LuceneIndexer indexer = new LuceneIndexer(LuceneIndexDir, indexCreationMode); // open up the index again indexer.CloseIndexWriter(OptimizeMode.DoOptimization); // just to optimize the index (which removes deleted items). }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) { // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); } if (reader.NumDocs() != numDocs) { throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); } System.Collections.ICollection fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry)i.Current; // update fieldToReader map System.String field = fi.Key.ToString(); if (fieldToReader[field] == null) { fieldToReader[field] = reader; } } if (!ignoreStoredFields) { storedFieldReaders.Add(reader); // add to storedFieldReaders } readers.Add(reader); }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { // update fieldToReader map System.String field = (System.String) i.Current; if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
public override bool HasDeletions() { // Don't call ensureOpen() here (it could affect performance) return(in_Renamed.HasDeletions()); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions()) { docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Close(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long)docCount) * 8 != fdxFileLength) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { docCount += ((IndexReader)iter.Current).NumDocs(); } } return(docCount); }
public static void AssertIndexEquals(IndexReader index1, IndexReader index2) { Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs."); Assert.AreEqual(index1.MaxDoc(), index2.MaxDoc(), "IndexReaders have different values for maxDoc."); Assert.AreEqual(index1.HasDeletions(), index2.HasDeletions(), "Only one IndexReader has deletions."); Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized."); // check field names System.Collections.ICollection fields1 = index1.GetFieldNames(FieldOption.ALL); System.Collections.ICollection fields2 = index2.GetFieldNames(FieldOption.ALL); Assert.AreEqual(fields1.Count, fields2.Count, "IndexReaders have different numbers of fields."); System.Collections.IEnumerator it1 = fields1.GetEnumerator(); System.Collections.IEnumerator it2 = fields2.GetEnumerator(); while (it1.MoveNext()) { Assert.IsTrue(it2.MoveNext()); Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names."); } // check norms it1 = fields1.GetEnumerator(); while (it1.MoveNext()) { System.String curField = (System.String) it1.Current; byte[] norms1 = index1.Norms(curField); byte[] norms2 = index2.Norms(curField); Assert.AreEqual(norms1.Length, norms2.Length); for (int i = 0; i < norms1.Length; i++) { Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'."); } } // check deletions for (int i = 0; i < index1.MaxDoc(); i++) { Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index."); } // check stored fields for (int i = 0; i < index1.MaxDoc(); i++) { if (!index1.IsDeleted(i)) { Document doc1 = index1.Document(i); Document doc2 = index2.Document(i); fields1 = doc1.GetFields(); fields2 = doc2.GetFields(); Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + "."); it1 = fields1.GetEnumerator(); it2 = fields2.GetEnumerator(); while (it1.MoveNext()) { Assert.IsTrue(it2.MoveNext()); Field curField1 = (Field) it1.Current; Field curField2 = (Field) it2.Current; Assert.AreEqual(curField1.Name(), curField2.Name(), "Different fields names for doc " + i + "."); Assert.AreEqual(curField1.StringValue(), curField2.StringValue(), "Different field values for doc " + i + "."); } } } // check dictionary and posting lists TermEnum enum1 = index1.Terms(); TermEnum enum2 = index2.Terms(); TermPositions tp1 = index1.TermPositions(); TermPositions tp2 = index2.TermPositions(); while (enum1.Next()) { Assert.IsTrue(enum2.Next()); Assert.AreEqual(enum1.Term(), enum2.Term(), "Different term in dictionary."); tp1.Seek(enum1.Term()); tp2.Seek(enum1.Term()); while (tp1.Next()) { Assert.IsTrue(tp2.Next()); Assert.AreEqual(tp1.Doc(), tp2.Doc(), "Different doc id in postinglist of term " + enum1.Term() + "."); Assert.AreEqual(tp1.Freq(), tp2.Freq(), "Different term frequence in postinglist of term " + enum1.Term() + "."); for (int i = 0; i < tp1.Freq(); i++) { Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term() + "."); } } } }
public override bool HasDeletions() { return(in_Renamed.HasDeletions()); }
/// <summary> /// Returns a single <seealso cref="Bits"/> instance for this /// reader, merging live Documents on the /// fly. this method will return null if the reader /// has no deletions. /// /// <p><b>NOTE</b>: this is a very slow way to access live docs. /// For example, each Bits access will require a binary search. /// It's better to get the sub-readers and iterate through them /// yourself. /// </summary> public static Bits GetLiveDocs(IndexReader reader) { if (reader.HasDeletions()) { IList<AtomicReaderContext> leaves = reader.Leaves(); int size = leaves.Count; Debug.Assert(size > 0, "A reader with deletions must have at least one leave"); if (size == 1) { return leaves[0].AtomicReader.LiveDocs; } Bits[] liveDocs = new Bits[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { // record all liveDocs, even if they are null AtomicReaderContext ctx = leaves[i]; liveDocs[i] = ctx.AtomicReader.LiveDocs; starts[i] = ctx.DocBase; } starts[size] = reader.MaxDoc(); return new MultiBits(liveDocs, starts, true); } else { return null; } }
/// <summary> Merge the TermVectors from each of the segments into the new one.</summary> /// <throws> IOException </throws> private void MergeVectors() { TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { for (int r = 0; r < readers.Count; r++) { SegmentReader matchingSegmentReader = matchingSegmentReaders[r]; TermVectorsReader matchingVectorsReader; bool hasMatchingReader; if (matchingSegmentReader != null) { matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig; // If the TV* files are an older format then they // cannot read raw docs: if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs()) { matchingVectorsReader = null; hasMatchingReader = false; } else { hasMatchingReader = matchingVectorsReader != null; } } else { hasMatchingReader = false; matchingVectorsReader = null; } IndexReader reader = (IndexReader)readers[r]; bool hasDeletions = reader.HasDeletions(); int maxDoc = reader.MaxDoc(); for (int docNum = 0; docNum < maxDoc;) { // skip deleted docs if (!hasDeletions || !reader.IsDeleted(docNum)) { if (hasMatchingReader) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = docNum; int numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (hasDeletions && matchingSegmentReader.IsDeleted(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); if (checkAbort != null) { checkAbort.Work(300 * numDocs); } } else { // NOTE: it's very important to first assign // to vectors then pass it to // termVectorsWriter.addAllDocVectors; see // LUCENE-1282 TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); docNum++; if (checkAbort != null) { checkAbort.Work(300); } } } else { docNum++; } } } } finally { termVectorsWriter.Close(); } long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); // {{dougsale-2.4.0} // this shouldn't be a problem for us - if it is, // then it's not a JRE bug //if (4 + mergedDocs * 16 != tvxSize) // // This is most likely a bug in Sun JRE 1.6.0_04/_05; // // we detect that the bug has struck, here, and // // throw an exception to prevent the corruption from // // entering the index. See LUCENE-1282 for // // details. // throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption"); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) { FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; FieldsReader matchingFieldsReader; bool hasMatchingReader; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && !fieldsReader.CanReadRawDocs()) { matchingFieldsReader = null; hasMatchingReader = false; } else { matchingFieldsReader = fieldsReader; hasMatchingReader = true; } } else { hasMatchingReader = false; matchingFieldsReader = null; } int maxDoc = reader.MaxDoc(); bool hasDeletions = reader.HasDeletions(); for (int j = 0; j < maxDoc;) { if (!hasDeletions || !reader.IsDeleted(j)) { // skip deleted docs if (hasMatchingReader) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = j; int numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (hasDeletions && matchingSegmentReader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; if (checkAbort != null) { checkAbort.Work(300 * numDocs); } } else { // NOTE: it's very important to first assign // to doc then pass it to // termVectorsWriter.addAllDocVectors; see // LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); j++; docCount++; if (checkAbort != null) { checkAbort.Work(300); } } } else { j++; } } } } finally { fieldsWriter.Close(); } long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); // {{dougsale-2.4.0} // this shouldn't be a problem for us - if it is, // then it's not a JRE bug... //if (4+docCount*8 != fdxFileLength) // // This is most likely a bug in Sun JRE 1.6.0_04/_05; // // we detect that the bug has struck, here, and // // throw an exception to prevent the corruption from // // entering the index. See LUCENE-1282 for // // details. // throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption"); } else { // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount for (int i = 0; i < readers.Count; i++) { docCount += ((IndexReader)readers[i]).NumDocs(); } } return(docCount); }