public void CloseDocStore(SegmentWriteState state) { lock (this) { int inc = state.numDocsInStore - lastDocID; if (inc > 0) { InitFieldsWriter(); Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } if (fieldsWriter != null) { fieldsWriter.Close(); fieldsWriter = null; lastDocID = 0; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; if (4 + ((long)state.numDocsInStore) * 8 != state.directory.FileLength(fileName)) { throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } } } }
internal override void closeDocStore(DocumentsWriter.FlushState state) { lock (this) { int inc = state.numDocsInStore - lastDocID; if (inc > 0) { initFieldsWriter(); fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } if (fieldsWriter != null) { fieldsWriter.Close(); fieldsWriter = null; lastDocID = 0; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); string fdtFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION; string fdxFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; state.flushedFiles[fdtFile] = fdtFile; state.flushedFiles[fdxFile] = fdxFile; state.docWriter.RemoveOpenFile(fdtFile); state.docWriter.RemoveOpenFile(fdxFile); if (4 + state.numDocsInStore * 8 != state.directory.FileLength(fdxFile)) { throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fdxFile) + " length in bytes of " + fdxFile); } } } }
public void CloseDocStore(SegmentWriteState state) { lock (this) { int inc = state.numDocsInStore - lastDocID; if (inc > 0) { InitFieldsWriter(); Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } if (fieldsWriter != null) { fieldsWriter.Close(); fieldsWriter = null; lastDocID = 0; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } } }
private int CopyFieldsNoDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc; int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return(docCount); }
private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (reader.IsDeleted(j)) { j++; break; } }while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return(docCount); }
public /*internal*/ void AddDocument(System.String segment, Document doc) { // write field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* * for (int i = 0; i < postings.length; i++) { * Posting posting = postings[i]; * System.out.print(posting.term); * System.out.print(" freq=" + posting.freq); * System.out.print(" pos="); * System.out.print(posting.positions[0]); * for (int j = 1; j < posting.freq; j++) * System.out.print("," + posting.positions[j]); * System.out.println(""); * } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(segment); }
public void AddDocument(System.String segment, Document doc) { // write field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* for (int i = 0; i < postings.length; i++) { Posting posting = postings[i]; System.out.print(posting.term); System.out.print(" freq=" + posting.freq); System.out.print(" pos="); System.out.print(posting.positions[0]); for (int j = 1; j < posting.freq; j++) System.out.print("," + posting.positions[j]); System.out.println(""); } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(segment); }
private void InitFieldsWriter() { if (fieldsWriter == null) { System.String docStoreSegment = docWriter.DocStoreSegment; if (docStoreSegment != null) { System.Diagnostics.Debug.Assert(docStoreSegment != null); fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); lastDocID = 0; } } }
private void InitFieldsWriter() { if (fieldsWriter == null) { System.String docStoreSegment = docWriter.GetDocStoreSegment(); if (docStoreSegment != null) { System.Diagnostics.Debug.Assert(docStoreSegment != null); fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); lastDocID = 0; } } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
internal void Abort() { lock (this) { if (fieldsWriter != null) { try { fieldsWriter.Close(); } catch (System.Exception t) { } fieldsWriter = null; lastDocID = 0; } } }
internal override void Abort() { lock (this) { if (fieldsWriter != null) { try { fieldsWriter.Close(); } catch (System.Exception) { } fieldsWriter = null; lastDocID = 0; } } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) { FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; FieldsReader matchingFieldsReader; bool hasMatchingReader; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && !fieldsReader.CanReadRawDocs()) { matchingFieldsReader = null; hasMatchingReader = false; } else { matchingFieldsReader = fieldsReader; hasMatchingReader = true; } } else { hasMatchingReader = false; matchingFieldsReader = null; } int maxDoc = reader.MaxDoc(); bool hasDeletions = reader.HasDeletions(); for (int j = 0; j < maxDoc;) { if (!hasDeletions || !reader.IsDeleted(j)) { // skip deleted docs if (hasMatchingReader) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = j; int numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (hasDeletions && matchingSegmentReader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; if (checkAbort != null) { checkAbort.Work(300 * numDocs); } } else { // NOTE: it's very important to first assign // to doc then pass it to // termVectorsWriter.addAllDocVectors; see // LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); j++; docCount++; if (checkAbort != null) { checkAbort.Work(300); } } } else { j++; } } } } finally { fieldsWriter.Close(); } long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); // {{dougsale-2.4.0} // this shouldn't be a problem for us - if it is, // then it's not a JRE bug... //if (4+docCount*8 != fdxFileLength) // // This is most likely a bug in Sun JRE 1.6.0_04/_05; // // we detect that the bug has struck, here, and // // throw an exception to prevent the corruption from // // entering the index. See LUCENE-1282 for // // details. // throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption"); } else { // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount for (int i = 0; i < readers.Count; i++) { docCount += ((IndexReader)readers[i]).NumDocs(); } } return(docCount); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader) readers[readers.Count - 1]; fieldInfos = (FieldInfos) sr.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) { FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; if (mergeDocStores) { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; bool same = true; FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos(); for (int j = 0; same && j < segmentFieldInfos.Size(); j++) same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j)); if (same) { matchingSegmentReaders[i] = segmentReader; } } } // Used for bulk-reading raw bytes for stored fields int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; FieldsReader matchingFieldsReader; if (matchingSegmentReader != null) matchingFieldsReader = matchingSegmentReader.GetFieldsReader(); else matchingFieldsReader = null; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; ) { if (!reader.IsDeleted(j)) { // skip deleted docs if (matchingSegmentReader != null) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = j; int numDocs = 0; do { j++; numDocs++; } while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; if (checkAbort != null) checkAbort.Work(300 * numDocs); } else { fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); j++; docCount++; if (checkAbort != null) checkAbort.Work(300); } } else j++; } } } finally { fieldsWriter.Close(); } System.Diagnostics.Debug.Assert(docCount*8 == directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION), "after MergeFields: fdx size mismatch: " + docCount + " docs vs " + directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) + " length in bytes of " + segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else for (int i = 0; i < readers.Count; i++) docCount += ((IndexReader) readers[i]).NumDocs(); return docCount; }
private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc(); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc; ) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) break; if (reader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return docCount; }
private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount, fieldSelectorMerge); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return docCount; }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader) readers[readers.Count - 1]; fieldInfos = (FieldInfos) sr.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) { FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; FieldsReader matchingFieldsReader; bool hasMatchingReader; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && !fieldsReader.CanReadRawDocs()) { matchingFieldsReader = null; hasMatchingReader = false; } else { matchingFieldsReader = fieldsReader; hasMatchingReader = true; } } else { hasMatchingReader = false; matchingFieldsReader = null; } int maxDoc = reader.MaxDoc(); bool hasDeletions = reader.HasDeletions(); for (int j = 0; j < maxDoc; ) { if (!hasDeletions || !reader.IsDeleted(j)) { // skip deleted docs if (hasMatchingReader) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = j; int numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) break; if (hasDeletions && matchingSegmentReader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; if (checkAbort != null) checkAbort.Work(300 * numDocs); } else { // NOTE: it's very important to first assign // to doc then pass it to // termVectorsWriter.addAllDocVectors; see // LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); j++; docCount++; if (checkAbort != null) checkAbort.Work(300); } } else j++; } } } finally { fieldsWriter.Close(); } long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); // {{dougsale-2.4.0} // this shouldn't be a problem for us - if it is, // then it's not a JRE bug... //if (4+docCount*8 != fdxFileLength) // // This is most likely a bug in Sun JRE 1.6.0_04/_05; // // we detect that the bug has struck, here, and // // throw an exception to prevent the corruption from // // entering the index. See LUCENE-1282 for // // details. // throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption"); } else // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount for (int i = 0; i < readers.Count; i++) docCount += ((IndexReader)readers[i]).NumDocs(); return docCount; }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader) readers[readers.Count - 1]; fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); ) { IndexReader reader = (IndexReader) iter.Current; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); ) { IndexReader reader = (IndexReader) iter.Current; SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions()) { docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Close(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long) docCount) * 8 != fdxFileLength) // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); ) { docCount += ((IndexReader) iter.Current).NumDocs(); } } return docCount; }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } foreach (IndexReader reader in readers) { if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; FieldInfos readerFieldInfos = segmentReader.FieldInfos(); int numReaderFieldInfos = readerFieldInfos.Size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; SetMatchingSegmentReaders(); if (mergeDocStores) { // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { int idx = 0; foreach (IndexReader reader in readers) { SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); if (fieldsReader != null && fieldsReader.CanReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if (reader.HasDeletions) { docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader); } else { docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader); } } } finally { fieldsWriter.Dispose(); } System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; long fdxFileLength = directory.FileLength(fileName); if (4 + ((long)docCount) * 8 != fdxFileLength) { // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { foreach (IndexReader reader in readers) { docCount += reader.NumDocs(); } } return(docCount); }
/// <summary>Initializes shared state for this new document </summary> internal void Init(Document doc, int docID) { System.Diagnostics.Debug.Assert(!isIdle); System.Diagnostics.Debug.Assert(Enclosing_Instance.writer.TestPoint("DocumentsWriter.ThreadState.init start")); this.docID = docID; docBoost = doc.GetBoost(); numStoredFields = 0; numFieldData = 0; numVectorFields = 0; maxTermPrefix = null; System.Diagnostics.Debug.Assert(0 == fdtLocal.Length()); System.Diagnostics.Debug.Assert(0 == fdtLocal.GetFilePointer()); System.Diagnostics.Debug.Assert(0 == tvfLocal.Length()); System.Diagnostics.Debug.Assert(0 == tvfLocal.GetFilePointer()); int thisFieldGen = fieldGen++; System.Collections.IList docFields = doc.GetFields(); int numDocFields = docFields.Count; bool docHasVectors = false; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for (int i = 0; i < numDocFields; i++) { Fieldable field = (Fieldable) docFields[i]; FieldInfo fi = Enclosing_Instance.fieldInfos.Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false); if (fi.isIndexed && !fi.omitNorms) { // Maybe grow our buffered norms if (Enclosing_Instance.norms.Length <= fi.number) { int newSize = (int) ((1 + fi.number) * 1.25); BufferedNorms[] newNorms = new BufferedNorms[newSize]; Array.Copy(Enclosing_Instance.norms, 0, newNorms, 0, Enclosing_Instance.norms.Length); Enclosing_Instance.norms = newNorms; } if (Enclosing_Instance.norms[fi.number] == null) Enclosing_Instance.norms[fi.number] = new BufferedNorms(); Enclosing_Instance.hasNorms = true; } // Make sure we have a FieldData allocated int hashPos = fi.name.GetHashCode() & fieldDataHashMask; FieldData fp = fieldDataHash[hashPos]; while (fp != null && !fp.fieldInfo.name.Equals(fi.name)) fp = fp.next; if (fp == null) { fp = new FieldData(this, fi); fp.next = fieldDataHash[hashPos]; fieldDataHash[hashPos] = fp; if (numAllFieldData == allFieldDataArray.Length) { int newSize = (int) (allFieldDataArray.Length * 1.5); int newHashSize = fieldDataHash.Length * 2; FieldData[] newArray = new FieldData[newSize]; FieldData[] newHashArray = new FieldData[newHashSize]; Array.Copy(allFieldDataArray, 0, newArray, 0, numAllFieldData); // Rehash fieldDataHashMask = newSize - 1; for (int j = 0; j < fieldDataHash.Length; j++) { FieldData fp0 = fieldDataHash[j]; while (fp0 != null) { hashPos = fp0.fieldInfo.name.GetHashCode() & fieldDataHashMask; FieldData nextFP0 = fp0.next; fp0.next = newHashArray[hashPos]; newHashArray[hashPos] = fp0; fp0 = nextFP0; } } allFieldDataArray = newArray; fieldDataHash = newHashArray; } allFieldDataArray[numAllFieldData++] = fp; } else { System.Diagnostics.Debug.Assert(fp.fieldInfo == fi); } if (thisFieldGen != fp.lastGen) { // First time we're seeing this field for this doc fp.lastGen = thisFieldGen; fp.fieldCount = 0; fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false; fp.doNorms = fi.isIndexed && !fi.omitNorms; if (numFieldData == fieldDataArray.Length) { int newSize = fieldDataArray.Length * 2; FieldData[] newArray = new FieldData[newSize]; Array.Copy(fieldDataArray, 0, newArray, 0, numFieldData); fieldDataArray = newArray; } fieldDataArray[numFieldData++] = fp; } if (field.IsTermVectorStored()) { if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.Length) { int newSize = (int) (numVectorFields * 1.5); vectorFieldPointers = new long[newSize]; vectorFieldNumbers = new int[newSize]; } fp.doVectors = true; docHasVectors = true; fp.doVectorPositions |= field.IsStorePositionWithTermVector(); fp.doVectorOffsets |= field.IsStoreOffsetWithTermVector(); } if (fp.fieldCount == fp.docFields.Length) { Fieldable[] newArray = new Fieldable[fp.docFields.Length * 2]; Array.Copy(fp.docFields, 0, newArray, 0, fp.docFields.Length); fp.docFields = newArray; } // Lazily allocate arrays for postings: if (field.IsIndexed() && fp.postingsHash == null) fp.InitPostingArrays(); fp.docFields[fp.fieldCount++] = field; } // Maybe init the local & global fieldsWriter if (localFieldsWriter == null) { if (Enclosing_Instance.fieldsWriter == null) { System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment == null); System.Diagnostics.Debug.Assert(Enclosing_Instance.segment != null); Enclosing_Instance.docStoreSegment = Enclosing_Instance.segment; // If we hit an exception while init'ing the // fieldsWriter, we must abort this segment // because those files will be in an unknown // state: try { Enclosing_Instance.fieldsWriter = new FieldsWriter(Enclosing_Instance.directory, Enclosing_Instance.docStoreSegment, Enclosing_Instance.fieldInfos); } catch (System.Exception t) { throw new AbortException(t, Enclosing_Instance); } Enclosing_Instance.files = null; } localFieldsWriter = new FieldsWriter(null, fdtLocal, Enclosing_Instance.fieldInfos); } // First time we see a doc that has field(s) with // stored vectors, we init our tvx writer if (docHasVectors) { if (Enclosing_Instance.tvx == null) { System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: try { Enclosing_Instance.tvx = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); Enclosing_Instance.tvx.WriteInt(TermVectorsReader.FORMAT_VERSION); Enclosing_Instance.tvd = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); Enclosing_Instance.tvd.WriteInt(TermVectorsReader.FORMAT_VERSION); Enclosing_Instance.tvf = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); Enclosing_Instance.tvf.WriteInt(TermVectorsReader.FORMAT_VERSION); // We must "catch up" for all docs before us // that had no vectors: for (int i = 0; i < Enclosing_Instance.numDocsInStore; i++) { Enclosing_Instance.tvx.WriteLong(Enclosing_Instance.tvd.GetFilePointer()); Enclosing_Instance.tvd.WriteVInt(0); } } catch (System.Exception t) { throw new AbortException(t, Enclosing_Instance); } Enclosing_Instance.files = null; } numVectorFields = 0; } }
/// <summary>Called if we hit an exception when adding docs, /// flushing, etc. This resets our state, discarding any /// docs added since last flush. If ae is non-null, it /// contains the root cause exception (which we re-throw /// after we are done aborting). /// </summary> internal void Abort(AbortException ae) { lock (this) { // Anywhere that throws an AbortException must first // mark aborting to make sure while the exception is // unwinding the un-synchronized stack, no thread grabs // the corrupt ThreadState that hit the aborting // exception: System.Diagnostics.Debug.Assert(ae == null || abortCount > 0); try { if (infoStream != null) infoStream.WriteLine("docWriter: now abort"); // Forcefully remove waiting ThreadStates from line for (int i = 0; i < numWaiting; i++) waitingThreadStates[i].isIdle = true; numWaiting = 0; // Wait for all other threads to finish with DocumentsWriter: PauseAllThreads(); System.Diagnostics.Debug.Assert(0 == numWaiting); try { bufferedDeleteTerms.Clear(); bufferedDeleteDocIDs.Clear(); numBufferedDeleteTerms = 0; try { abortedFiles = Files(); } catch (System.Exception) { abortedFiles = null; } docStoreSegment = null; numDocsInStore = 0; docStoreOffset = 0; files = null; // Clear vectors & fields from ThreadStates for (int i = 0; i < threadStates.Length; i++) { ThreadState state = threadStates[i]; state.tvfLocal.Reset(); state.fdtLocal.Reset(); if (state.localFieldsWriter != null) { try { state.localFieldsWriter.Close(); } catch (System.Exception) { } state.localFieldsWriter = null; } } // Reset vectors writer if (tvx != null) { try { tvx.Close(); } catch (System.Exception) { } tvx = null; } if (tvd != null) { try { tvd.Close(); } catch (System.Exception) { } tvd = null; } if (tvf != null) { try { tvf.Close(); } catch (System.Exception) { } tvf = null; } // Reset fields writer if (fieldsWriter != null) { try { fieldsWriter.Close(); } catch (System.Exception) { } fieldsWriter = null; } // Discard pending norms: int numField = fieldInfos.Size(); for (int i = 0; i < numField; i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { BufferedNorms n = norms[i]; if (n != null) try { n.Reset(); } catch (System.Exception) { } } } // Reset all postings data ResetPostingsData(); } finally { ResumeAllThreads(); } // If we have a root cause exception, re-throw it now: if (ae != null) { System.Exception t = ae.InnerException; if (t is System.IO.IOException) throw (System.IO.IOException) t; else if (t is System.SystemException) throw (System.SystemException) t; else if (t is System.ApplicationException) throw (System.ApplicationException) t; else // Should not get here System.Diagnostics.Debug.Assert(false, "unknown exception: " + t); } } finally { if (ae != null) abortCount--; System.Threading.Monitor.PulseAll(this); } } }
public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) { this.storedFieldsWriter = storedFieldsWriter; this.docState = docState; localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, that means // all segments were written as part of a single // autoCommit=false IndexWriter session, so their field // name -> number mapping are the same. So, we start // with the fieldInfos of the last segment in this // case, to keep that numbering. SegmentReader sr = (SegmentReader)readers[readers.Count - 1]; fieldInfos = (FieldInfos)sr.fieldInfos.Clone(); } else { fieldInfos = new FieldInfos(); // merge field names } for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++) { FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j); fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads); } } else { AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } fieldInfos.Write(directory, segment + ".fnm"); int docCount = 0; if (mergeDocStores) { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; if (reader is SegmentReader) { SegmentReader segmentReader = (SegmentReader)reader; bool same = true; FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos(); for (int j = 0; same && j < segmentFieldInfos.Size(); j++) { same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j)); } if (same) { matchingSegmentReaders[i] = segmentReader; } } } // Used for bulk-reading raw bytes for stored fields int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); // merge field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; FieldsReader matchingFieldsReader; if (matchingSegmentReader != null) { matchingFieldsReader = matchingSegmentReader.GetFieldsReader(); } else { matchingFieldsReader = null; } int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc;) { if (!reader.IsDeleted(j)) { // skip deleted docs if (matchingSegmentReader != null) { // We can optimize this case (doing a bulk // byte copy) since the field numbers are // identical int start = j; int numDocs = 0; do { j++; numDocs++; }while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; if (checkAbort != null) { checkAbort.Work(300 * numDocs); } } else { fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); j++; docCount++; if (checkAbort != null) { checkAbort.Work(300); } } } else { j++; } } } } finally { fieldsWriter.Close(); } } // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount else { for (int i = 0; i < readers.Count; i++) { docCount += ((IndexReader)readers[i]).NumDocs(); } } return(docCount); }
public StoredFieldsWriterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, StoredFieldsWriter storedFieldsWriter) { this.storedFieldsWriter = storedFieldsWriter; this.docState = docFieldProcessorPerThread.docState; localFieldsWriter = new FieldsWriter((IndexOutput)null, (IndexOutput)null, storedFieldsWriter.fieldInfos); }
/// <summary>Clear the postings hash and return objects back to /// shared pool /// </summary> public void ResetPostings() { fieldGen = 0; maxPostingsVectors = 0; doFlushAfter = false; if (localFieldsWriter != null) { localFieldsWriter.Close(); localFieldsWriter = null; } postingsPool.Reset(); charPool.Reset(); Enclosing_Instance.RecyclePostings(postingsFreeList, postingsFreeCount); postingsFreeCount = 0; for (int i = 0; i < numAllFieldData; i++) { FieldData fp = allFieldDataArray[i]; fp.lastGen = - 1; if (fp.numPostings > 0) fp.ResetPostingArrays(); } }
internal override void closeDocStore(DocumentsWriter.FlushState state) { lock (this) { int inc = state.numDocsInStore - lastDocID; if (inc > 0) { initFieldsWriter(); fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } if (fieldsWriter != null) { fieldsWriter.Close(); fieldsWriter = null; lastDocID = 0; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); string fdtFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION; string fdxFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; state.flushedFiles[fdtFile] = fdtFile; state.flushedFiles[fdxFile] = fdxFile; state.docWriter.RemoveOpenFile(fdtFile); state.docWriter.RemoveOpenFile(fdxFile); if (4 + state.numDocsInStore * 8 != state.directory.FileLength(fdxFile)) throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fdxFile) + " length in bytes of " + fdxFile); } } }
/// <summary>Closes the current open doc stores an returns the doc /// store segment name. This returns null if there are * /// no buffered documents. /// </summary> internal System.String CloseDocStore() { System.Diagnostics.Debug.Assert(AllThreadsIdle()); System.Collections.IList flushedFiles = Files(); if (infoStream != null) infoStream.WriteLine("\ncloseDocStore: " + flushedFiles.Count + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore); if (flushedFiles.Count > 0) { files = null; if (tvx != null) { // At least one doc in this run had term vectors enabled System.Diagnostics.Debug.Assert(docStoreSegment != null); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(4 + numDocsInStore * 8 == directory.FileLength(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION), "after flush: tvx size mismatch: " + numDocsInStore + " docs vs " + directory.FileLength(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + " length in bytes of " + docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); } if (fieldsWriter != null) { System.Diagnostics.Debug.Assert(docStoreSegment != null); fieldsWriter.Close(); fieldsWriter = null; System.Diagnostics.Debug.Assert(numDocsInStore * 8 == directory.FileLength(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION), "after flush: fdx size mismatch: " + numDocsInStore + " docs vs " + directory.FileLength(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) + " length in bytes of " + docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); } System.String s = docStoreSegment; docStoreSegment = null; docStoreOffset = 0; numDocsInStore = 0; return s; } else { return null; } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j)); docCount++; } } } finally { fieldsWriter.Close(); } return docCount; }
public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) { this.storedFieldsWriter = storedFieldsWriter; this.docState = docState; localFieldsWriter = new FieldsWriter((IndexOutput)null, (IndexOutput)null, storedFieldsWriter.fieldInfos); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); docCount++; } } } finally { fieldsWriter.Close(); } return docCount; }