internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long)state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) { throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; doClose = false; }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != -1) { output.WriteString(docStoreSegment); output.WriteByte((byte)(docStoreIsCompoundFile?1:0)); } output.WriteByte((byte)(hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte)isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte)(hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
internal void InitTermVectorsWriter() { lock (this) { if (tvx == null) { System.String docStoreSegment = docWriter.GetDocStoreSegment(); if (docStoreSegment == null) { return; } System.Diagnostics.Debug.Assert(docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
private void MergeNorms() { byte[] normBuffer = null; IndexOutput output = null; try { int numFieldInfos = fieldInfos.Size(); for (int i = 0; i < numFieldInfos; i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { if (output == null) { output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length); } for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();) { IndexReader reader = (IndexReader)iter.Current; int maxDoc = reader.MaxDoc(); if (normBuffer == null || normBuffer.Length < maxDoc) { // the buffer is too small for the current segment normBuffer = new byte[maxDoc]; } reader.Norms(fi.name, normBuffer, 0); if (!reader.HasDeletions()) { //optimized case for segments without deleted docs output.WriteBytes(normBuffer, maxDoc); } else { // this segment has deleted docs, so we have to // check for every doc if it is deleted or not for (int k = 0; k < maxDoc; k++) { if (!reader.IsDeleted(k)) { output.WriteByte(normBuffer[k]); } } } checkAbort.Work(maxDoc); } } } } finally { if (output != null) { output.Close(); } } }
internal void Close() { if (doClose) { try { if (fieldsStream != null) { try { fieldsStream.Close(); } finally { fieldsStream = null; } } } catch (System.IO.IOException ioe) { try { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } catch (System.IO.IOException ioe2) { // Ignore so we throw only first IOException hit } throw ioe; } finally { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } } }
internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput):base(skipInterval, numberOfSkipLevels, docCount) { this.freqOutput = freqOutput; this.proxOutput = proxOutput; lastSkipDoc = new int[numberOfSkipLevels]; lastSkipPayloadLength = new int[numberOfSkipLevels]; lastSkipFreqPointer = new long[numberOfSkipLevels]; lastSkipProxPointer = new long[numberOfSkipLevels]; }
internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) : base(skipInterval, numberOfSkipLevels, docCount) { this.freqOutput = freqOutput; this.proxOutput = proxOutput; lastSkipDoc = new int[numberOfSkipLevels]; lastSkipPayloadLength = new int[numberOfSkipLevels]; lastSkipFreqPointer = new long[numberOfSkipLevels]; lastSkipProxPointer = new long[numberOfSkipLevels]; }
protected internal override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (curStorePayloads) { int delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt(delta * 2 + 1); skipBuffer.WriteVInt(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]); } skipBuffer.WriteVInt((int)(curFreqPointer - lastSkipFreqPointer[level])); skipBuffer.WriteVInt((int)(curProxPointer - lastSkipProxPointer[level])); lastSkipDoc[level] = curDoc; //System.out.println("write doc at level " + level + ": " + curDoc); lastSkipFreqPointer[level] = curFreqPointer; lastSkipProxPointer[level] = curProxPointer; }
public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); this.fieldInfos = fieldInfos; }
public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); this.fieldInfos = fieldInfos; }
public void Write(Directory d, System.String name) { IndexOutput output = d.CreateOutput(name); try { Write(output); } finally { output.Close(); } }
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(segment + (isIndex?".tii":".tis")); output.WriteInt(FORMAT_CURRENT); // write format output.WriteLong(0); // leave space for size output.WriteInt(indexInterval); // write indexInterval output.WriteInt(skipInterval); // write skipInterval output.WriteInt(maxSkipLevels); // write maxSkipLevels System.Diagnostics.Debug.Assert(InitUTF16Results()); }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput is_Renamed = null; try { long startPtr = os.GetFilePointer(); is_Renamed = directory.OpenInput(source.file); long length = is_Renamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { int len = (int)System.Math.Min(chunk, remainder); is_Renamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) { // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } } // Verify that remainder is 0 if (remainder != 0) { throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); } // Verify that the output length diff is equal to original file long endPtr = os.GetFilePointer(); long diff = endPtr - startPtr; if (diff != length) { throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } } finally { if (is_Renamed != null) { is_Renamed.Close(); } } }
internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent) : base() { this.parent = parent; System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.dir.CreateOutput(fileName); totalNumDocs = parent.parent.totalNumDocs; // TODO: abstraction violation skipInterval = parent.parent.termsOut.skipInterval; skipListWriter = parent.parent.skipListWriter; skipListWriter.SetFreqOutput(out_Renamed); posWriter = new FormatPostingsPositionsWriter(state, this); }
internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base() { this.parent = parent; System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.dir.CreateOutput(fileName); totalNumDocs = parent.parent.totalNumDocs; // TODO: abstraction violation skipInterval = parent.parent.termsOut.skipInterval; skipListWriter = parent.parent.skipListWriter; skipListWriter.SetFreqOutput(out_Renamed); posWriter = new FormatPostingsPositionsWriter(state, this); }
internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) { this.parent = parent; omitTermFreqAndPositions = parent.omitTermFreqAndPositions; if (parent.parent.parent.fieldInfos.HasProx()) { // At least one field does not omit TF, so create the // prox file System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); parent.skipListWriter.SetProxOutput(out_Renamed); } // Every field omits TF so we will write no prox file else out_Renamed = null; }
internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) { this.parent = parent; omitTermFreqAndPositions = parent.omitTermFreqAndPositions; if (parent.parent.parent.fieldInfos.HasProx()) { // At least one field does not omit TF, so create the // prox file System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); parent.skipListWriter.SetProxOutput(out_Renamed); } // Every field omits TF so we will write no prox file else { out_Renamed = null; } }
/// <summary>Writes this vector to the file <code>name</code> in Directory /// <code>d</code>, in a format that can be read by the constructor {@link /// #BitVector(Directory, String)}. /// </summary> public void Write(Directory d, System.String name) { IndexOutput output = d.CreateOutput(name); try { if (IsSparse()) { WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps. } else { WriteBits(output); } } finally { output.Close(); } }
/// <summary>Write as a d-gaps list </summary> private void WriteDgaps(IndexOutput output) { output.WriteInt(-1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int n = Count(); int m = bits.Length; for (int i = 0; i < m && n > 0; i++) { if (bits[i] != 0) { output.WriteVInt(i - last); output.WriteByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xFF]; } } }
public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); byte bits = (byte)(0x0); if (fi.isIndexed) { bits |= IS_INDEXED; } if (fi.storeTermVector) { bits |= STORE_TERMVECTOR; } if (fi.storePositionWithTermVector) { bits |= STORE_POSITIONS_WITH_TERMVECTOR; } if (fi.storeOffsetWithTermVector) { bits |= STORE_OFFSET_WITH_TERMVECTOR; } if (fi.omitNorms) { bits |= OMIT_NORMS; } if (fi.storePayloads) { bits |= STORE_PAYLOADS; } if (fi.omitTermFreqAndPositions) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } output.WriteString(fi.name); output.WriteByte(bits); } }
/// <summary> Writes the buffered skip lists to the given output. /// /// </summary> /// <param name="output">the IndexOutput the skip lists shall be written to /// </param> /// <returns> the pointer the skip list starts /// </returns> internal virtual long WriteSkip(IndexOutput output) { long skipPointer = output.GetFilePointer(); if (skipBuffer == null || skipBuffer.Length == 0) { return(skipPointer); } for (int level = numberOfSkipLevels - 1; level > 0; level--) { long length = skipBuffer[level].GetFilePointer(); if (length > 0) { output.WriteVLong(length); skipBuffer[level].WriteTo(output); } } skipBuffer[0].WriteTo(output); return(skipPointer); }
public long WriteTo(IndexOutput out_Renamed) { long size = 0; while (true) { if (limit + bufferOffset == endIndex) { System.Diagnostics.Debug.Assert(endIndex - bufferOffset >= upto); out_Renamed.WriteBytes(buffer, upto, limit - upto); size += limit - upto; break; } else { out_Renamed.WriteBytes(buffer, upto, limit - upto); size += limit - upto; NextSlice(); } } return(size); }
public override void Abort() { if (tvx != null) { try { tvx.Close(); } catch (System.Exception t) { } tvx = null; } if (tvd != null) { try { tvd.Close(); } catch (System.Exception t) { } tvd = null; } if (tvf != null) { try { tvf.Close(); } catch (System.Exception t) { } tvf = null; } lastDocID = 0; }
/// <summary> Writes the buffered skip lists to the given output. /// /// </summary> /// <param name="output">the IndexOutput the skip lists shall be written to /// </param> /// <returns> the pointer the skip list starts /// </returns> internal virtual long WriteSkip(IndexOutput output) { long skipPointer = output.GetFilePointer(); if (skipBuffer == null || skipBuffer.Length == 0) return skipPointer; for (int level = numberOfSkipLevels - 1; level > 0; level--) { long length = skipBuffer[level].GetFilePointer(); if (length > 0) { output.WriteVLong(length); skipBuffer[level].WriteTo(output); } } skipBuffer[0].WriteTo(output); return skipPointer; }
internal virtual void SetProxOutput(IndexOutput proxOutput) { this.proxOutput = proxOutput; }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != - 1) { output.WriteString(docStoreSegment); output.WriteByte((byte) (docStoreIsCompoundFile?1:0)); } output.WriteByte((byte) (hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte) isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte) (hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
public override void Abort() { if (tvx != null) { try { tvx.Close(); } catch (System.Exception t) { } tvx = null; } if (tvd != null) { try { tvd.Close(); } catch (System.Exception t) { } tvd = null; } if (tvf != null) { try { tvf.Close(); } catch (System.Exception t) { } tvf = null; } lastDocID = 0; }
internal void InitTermVectorsWriter() { lock (this) { if (tvx == null) { System.String docStoreSegment = docWriter.GetDocStoreSegment(); if (docStoreSegment == null) return ; System.Diagnostics.Debug.Assert(docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
internal virtual void SetProxOutput(IndexOutput proxOutput) { this.proxOutput = proxOutput; }
internal void FinishCommit(Directory dir) { if (pendingSegnOutput == null) { throw new System.SystemException("prepareCommit was not called"); } bool success = false; try { pendingSegnOutput.FinishCommit(); pendingSegnOutput.Close(); pendingSegnOutput = null; success = true; } finally { if (!success) { RollbackCommit(dir); } } // NOTE: if we crash here, we have left a segments_N // file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others // didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a // reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); success = false; try { dir.Sync(fileName); success = true; } finally { if (!success) { try { dir.DeleteFile(fileName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } lastGeneration = generation; try { IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN); try { genOutput.WriteInt(FORMAT_LOCKLESS); genOutput.WriteLong(generation); genOutput.WriteLong(generation); } finally { genOutput.Close(); } } catch (System.Exception t) { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } }
/// <summary>Write as a d-gaps list </summary> private void WriteDgaps(IndexOutput output) { output.WriteInt(- 1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int n = Count(); int m = bits.Length; for (int i = 0; i < m && n > 0; i++) { if (bits[i] != 0) { output.WriteVInt(i - last); output.WriteByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xFF]; } } }
/// <summary>Write as a bit set </summary> private void WriteBits(IndexOutput output) { output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count output.WriteBytes(bits, bits.Length); }
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; bool success = false; System.String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION; try { fieldsStream = d.CreateOutput(fieldsName); fieldsStream.WriteInt(FORMAT_CURRENT); success = true; } finally { if (!success) { try { Close(); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } try { d.DeleteFile(fieldsName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } success = false; System.String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; try { indexStream = d.CreateOutput(indexName); indexStream.WriteInt(FORMAT_CURRENT); success = true; } finally { if (!success) { try { Close(); } catch (System.IO.IOException ioe) { } try { d.DeleteFile(fieldsName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } try { d.DeleteFile(indexName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } doClose = true; }
internal virtual void SetFreqOutput(IndexOutput freqOutput) { this.freqOutput = freqOutput; }
internal void Close() { if (doClose) { try { if (fieldsStream != null) { try { fieldsStream.Close(); } finally { fieldsStream = null; } } } catch (System.IO.IOException ioe) { try { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } catch (System.IO.IOException ioe2) { // Ignore so we throw only first IOException hit } throw ioe; } finally { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } } }
internal void SetFieldsStream(IndexOutput stream) { this.fieldsStream = stream; }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
/// <summary> Subclasses must implement the actual skip data encoding in this method. /// /// </summary> /// <param name="level">the level skip data shall be writting for /// </param> /// <param name="skipBuffer">the skip buffer to write to /// </param> protected internal abstract void WriteSkipData(int level, IndexOutput skipBuffer);
/// <summary>Write as a bit set </summary> private void WriteBits(IndexOutput output) { output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count output.WriteBytes(bits, bits.Length); }
internal virtual void SetFreqOutput(IndexOutput freqOutput) { this.freqOutput = freqOutput; }
/// <summary>Called once per field per document if term vectors /// are enabled, to write the vectors to /// RAMOutputStream, which is then quickly flushed to /// the real term vectors files in the Directory. /// </summary> internal override void Finish() { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start")); int numPostings = termsHashPerField.numPostings; System.Diagnostics.Debug.Assert(numPostings >= 0); if (!doVectors || numPostings == 0) { return; } if (numPostings > maxNumPostings) { maxNumPostings = numPostings; } IndexOutput tvf = perThread.doc.perDocTvf; // This is called once, after inverting all occurences // of a given field in the doc. At this point we flush // our hash into the DocWriter. System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector); System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo)); perThread.doc.AddField(termsHashPerField.fieldInfo.number); RawPostingList[] postings = termsHashPerField.SortPostings(); tvf.WriteVInt(numPostings); byte bits = (byte)(0x0); if (doVectorPositions) { bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (doVectorOffsets) { bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } tvf.WriteByte(bits); int encoderUpto = 0; int lastTermBytesCount = 0; ByteSliceReader reader = perThread.vectorSliceReader; char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers; for (int j = 0; j < numPostings; j++) { TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j]; int freq = posting.freq; char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK; // We swap between two encoders to save copying // last Term's byte array UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto]; // TODO: we could do this incrementally UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result); int termBytesCount = utf8Result.length; // TODO: UTF16toUTF8 could tell us this prefix // Compute common prefix between last term and // this term int prefix = 0; if (j > 0) { byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result; byte[] termBytes = perThread.utf8Results[encoderUpto].result; while (prefix < lastTermBytesCount && prefix < termBytesCount) { if (lastTermBytes[prefix] != termBytes[prefix]) { break; } prefix++; } } encoderUpto = 1 - encoderUpto; lastTermBytesCount = termBytesCount; int suffix = termBytesCount - prefix; tvf.WriteVInt(prefix); tvf.WriteVInt(suffix); tvf.WriteBytes(utf8Result.result, prefix, suffix); tvf.WriteVInt(freq); if (doVectorPositions) { termsHashPerField.InitReader(reader, posting, 0); reader.WriteTo(tvf); } if (doVectorOffsets) { termsHashPerField.InitReader(reader, posting, 1); reader.WriteTo(tvf); } } termsHashPerField.Reset(); // NOTE: we clear, per-field, at the thread level, // because term vectors fully write themselves on each // field; this saves RAM (eg if large doc has two large // fields w/ term vectors on) because we recycle/reuse // all RAM after each field: perThread.termsHashPerThread.Reset(false); }
/// <summary>Merge files with the extensions added up to now. /// All files with these extensions are combined sequentially into the /// compound stream. After successful merge, the source files /// are deleted. /// </summary> /// <throws> IllegalStateException if close() had been called before or </throws> /// <summary> if no file has been added to this object /// </summary> public void Close() { if (merged) { throw new System.SystemException("Merge already performed"); } if ((entries.Count == 0)) { throw new System.SystemException("No entries to merge have been defined"); } merged = true; // open the compound stream IndexOutput os = null; try { os = directory.CreateOutput(fileName); // Write the number of entries os.WriteVInt(entries.Count); // Write the directory with all offsets at 0. // Remember the positions of directory entries so that we can // adjust the offsets later System.Collections.IEnumerator it = entries.GetEnumerator(); long totalSize = 0; while (it.MoveNext()) { FileEntry fe = (FileEntry)it.Current; fe.directoryOffset = os.GetFilePointer(); os.WriteLong(0); // for now os.WriteString(fe.file); totalSize += directory.FileLength(fe.file); } // Pre-allocate size of file as optimization -- // this can potentially help IO performance as // we write the file and also later during // searching. It also uncovers a disk-full // situation earlier and hopefully without // actually filling disk to 100%: long finalLength = totalSize + os.GetFilePointer(); os.SetLength(finalLength); // Open the files and copy their data into the stream. // Remember the locations of each file's data section. byte[] buffer = new byte[16384]; it = entries.GetEnumerator(); while (it.MoveNext()) { FileEntry fe = (FileEntry)it.Current; fe.dataOffset = os.GetFilePointer(); CopyFile(fe, os, buffer); } // Write the data offsets into the directory of the compound stream it = entries.GetEnumerator(); while (it.MoveNext()) { FileEntry fe = (FileEntry)it.Current; os.Seek(fe.directoryOffset); os.WriteLong(fe.dataOffset); } System.Diagnostics.Debug.Assert(finalLength == os.Length()); // Close the output stream. Set the os to null before trying to // close so that if an exception occurs during the close, the // finally clause below will not attempt to close the stream // the second time. IndexOutput tmp = os; os = null; tmp.Close(); } finally { if (os != null) { try { os.Close(); } catch (System.IO.IOException e) { } } } }
/// <summary> Subclasses must implement the actual skip data encoding in this method. /// /// </summary> /// <param name="level">the level skip data shall be writting for /// </param> /// <param name="skipBuffer">the skip buffer to write to /// </param> protected internal abstract void WriteSkipData(int level, IndexOutput skipBuffer);
internal void SetFieldsStream(IndexOutput stream) { this.fieldsStream = stream; }
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(segment + (isIndex?".tii":".tis")); output.WriteInt(FORMAT_CURRENT); // write format output.WriteLong(0); // leave space for size output.WriteInt(indexInterval); // write indexInterval output.WriteInt(skipInterval); // write skipInterval output.WriteInt(maxSkipLevels); // write maxSkipLevels System.Diagnostics.Debug.Assert(InitUTF16Results()); }
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; bool success = false; System.String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION; try { fieldsStream = d.CreateOutput(fieldsName); fieldsStream.WriteInt(FORMAT_CURRENT); success = true; } finally { if (!success) { try { Close(); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } try { d.DeleteFile(fieldsName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } success = false; System.String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; try { indexStream = d.CreateOutput(indexName); indexStream.WriteInt(FORMAT_CURRENT); success = true; } finally { if (!success) { try { Close(); } catch (System.IO.IOException ioe) { } try { d.DeleteFile(fieldsName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } try { d.DeleteFile(indexName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } doClose = true; }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput is_Renamed = null; try { long startPtr = os.GetFilePointer(); is_Renamed = directory.OpenInput(source.file); long length = is_Renamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { int len = (int) System.Math.Min(chunk, remainder); is_Renamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } // Verify that remainder is 0 if (remainder != 0) throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); // Verify that the output length diff is equal to original file long endPtr = os.GetFilePointer(); long diff = endPtr - startPtr; if (diff != length) throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } finally { if (is_Renamed != null) is_Renamed.Close(); } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary byField = new System.Collections.Hashtable(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.ArrayList fieldsToRemove = new System.Collections.ArrayList(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.upto > 0) { // It has some norms System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo]; if (l == null) { l = new System.Collections.ArrayList(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = (NormsWriterPerField)toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
protected internal override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (curStorePayloads) { int delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt(delta * 2 + 1); skipBuffer.WriteVInt(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]); } skipBuffer.WriteVInt((int) (curFreqPointer - lastSkipFreqPointer[level])); skipBuffer.WriteVInt((int) (curProxPointer - lastSkipProxPointer[level])); lastSkipDoc[level] = curDoc; //System.out.println("write doc at level " + level + ": " + curDoc); lastSkipFreqPointer[level] = curFreqPointer; lastSkipProxPointer[level] = curProxPointer; }
public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); byte bits = (byte) (0x0); if (fi.isIndexed) bits |= IS_INDEXED; if (fi.storeTermVector) bits |= STORE_TERMVECTOR; if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.storePayloads) bits |= STORE_PAYLOADS; if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS; output.WriteString(fi.name); output.WriteByte(bits); } }
internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; doClose = false; }