internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput):base(skipInterval, numberOfSkipLevels, docCount) { this.freqOutput = freqOutput; this.proxOutput = proxOutput; lastSkipDoc = new int[numberOfSkipLevels]; lastSkipPayloadLength = new int[numberOfSkipLevels]; lastSkipFreqPointer = new long[numberOfSkipLevels]; lastSkipProxPointer = new long[numberOfSkipLevels]; }
protected internal override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (curStorePayloads) { int delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt(delta * 2 + 1); skipBuffer.WriteVInt(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]); } skipBuffer.WriteVInt((int) (curFreqPointer - lastSkipFreqPointer[level])); skipBuffer.WriteVInt((int) (curProxPointer - lastSkipProxPointer[level])); lastSkipDoc[level] = curDoc; //System.out.println("write doc at level " + level + ": " + curDoc); lastSkipFreqPointer[level] = curFreqPointer; lastSkipProxPointer[level] = curProxPointer; }
public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); this.fieldInfos = fieldInfos; }
internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base() { this.parent = parent; System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); state.flushedFiles.Add(fileName); out_Renamed = parent.parent.dir.CreateOutput(fileName); totalNumDocs = parent.parent.totalNumDocs; // TODO: abstraction violation skipInterval = parent.parent.termsOut.skipInterval; skipListWriter = parent.parent.skipListWriter; skipListWriter.SetFreqOutput(out_Renamed); posWriter = new FormatPostingsPositionsWriter(state, this); }
internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) { this.parent = parent; omitTermFreqAndPositions = parent.omitTermFreqAndPositions; if (parent.parent.parent.fieldInfos.HasProx()) { // At least one field does not omit TF, so create the // prox file System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); state.flushedFiles.Add(fileName); out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); parent.skipListWriter.SetProxOutput(out_Renamed); } // Every field omits TF so we will write no prox file else out_Renamed = null; }
/// <summary>Copy the current contents of this buffer to the named output. </summary> public virtual void WriteTo(IndexOutput out_Renamed) { Flush(); long end = file.length; long pos = 0; int buffer = 0; while (pos < end) { int length = BUFFER_SIZE; long nextPos = pos + length; if (nextPos > end) { // at the last buffer length = (int) (end - pos); } out_Renamed.WriteBytes(file.GetBuffer(buffer++), length); pos = nextPos; } }
public long WriteTo(IndexOutput @out) { long size = 0; while (true) { if (limit + bufferOffset == endIndex) { System.Diagnostics.Debug.Assert(endIndex - bufferOffset >= upto); @out.WriteBytes(buffer, upto, limit - upto); size += limit - upto; break; } else { @out.WriteBytes(buffer, upto, limit - upto); size += limit - upto; NextSlice(); } } return size; }
internal virtual void SetFreqOutput(IndexOutput freqOutput) { this.freqOutput = freqOutput; }
/// <summary> Writes the buffered skip lists to the given output. /// /// </summary> /// <param name="output">the IndexOutput the skip lists shall be written to /// </param> /// <returns> the pointer the skip list starts /// </returns> internal virtual long WriteSkip(IndexOutput output) { long skipPointer = output.FilePointer; if (skipBuffer == null || skipBuffer.Length == 0) return skipPointer; for (int level = numberOfSkipLevels - 1; level > 0; level--) { long length = skipBuffer[level].FilePointer; if (length > 0) { output.WriteVLong(length); skipBuffer[level].WriteTo(output); } } skipBuffer[0].WriteTo(output); return skipPointer; }
/// <summary> Subclasses must implement the actual skip data encoding in this method. /// /// </summary> /// <param name="level">the level skip data shall be writting for /// </param> /// <param name="skipBuffer">the skip buffer to write to /// </param> protected internal abstract void WriteSkipData(int level, IndexOutput skipBuffer);
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(segment + (isIndex?".tii":".tis")); output.WriteInt(FORMAT_CURRENT); // write format output.WriteLong(0); // leave space for size output.WriteInt(indexInterval); // write indexInterval output.WriteInt(skipInterval); // write skipInterval output.WriteInt(maxSkipLevels); // write maxSkipLevels System.Diagnostics.Debug.Assert(InitUTF16Results()); }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.DocStoreOffset); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput isRenamed = null; try { long startPtr = os.FilePointer; isRenamed = directory.OpenInput(source.file); long length = isRenamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { var len = (int) Math.Min(chunk, remainder); isRenamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } // Verify that remainder is 0 if (remainder != 0) throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); // Verify that the output length diff is equal to original file long endPtr = os.FilePointer; long diff = endPtr - startPtr; if (diff != length) throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } finally { if (isRenamed != null) isRenamed.Close(); } }
internal void InitTermVectorsWriter() { lock (this) { if (tvx == null) { System.String docStoreSegment = docWriter.DocStoreSegment; if (docStoreSegment == null) return ; System.Diagnostics.Debug.Assert(docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
/// <summary>Write as a d-gaps list </summary> private void WriteDgaps(IndexOutput output) { output.WriteInt(- 1); // mark using d-gaps output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count int last = 0; int n = Count(); int m = bits.Length; for (int i = 0; i < m && n > 0; i++) { if (bits[i] != 0) { output.WriteVInt(i - last); output.WriteByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xFF]; } } }
public void Dispose() { // Move to protected method if class becomes unsealed if (doClose) { try { if (fieldsStream != null) { try { fieldsStream.Close(); } finally { fieldsStream = null; } } } catch (System.IO.IOException) { try { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } catch (System.IO.IOException) { // Ignore so we throw only first IOException hit } throw; } finally { if (indexStream != null) { try { indexStream.Close(); } finally { indexStream = null; } } } } }
internal void SetFieldsStream(IndexOutput stream) { this.fieldsStream = stream; }
internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; doClose = false; }
public void Write(IndexOutput output) { output.WriteVInt(CURRENT_FORMAT); output.WriteVInt(Size()); for (int i = 0; i < Size(); i++) { FieldInfo fi = FieldInfo(i); var bits = (byte) (0x0); if (fi.isIndexed) bits |= IS_INDEXED; if (fi.storeTermVector) bits |= STORE_TERMVECTOR; if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.storePayloads) bits |= STORE_PAYLOADS; if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS; output.WriteString(fi.name); output.WriteByte(bits); } }
internal virtual void SetProxOutput(IndexOutput proxOutput) { this.proxOutput = proxOutput; }
public override void Abort() { if (tvx != null) { try { tvx.Close(); } catch (System.Exception) { } tvx = null; } if (tvd != null) { try { tvd.Close(); } catch (System.Exception) { } tvd = null; } if (tvf != null) { try { tvf.Close(); } catch (System.Exception) { } tvf = null; } lastDocID = 0; }
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; bool success = false; String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION; try { fieldsStream = d.CreateOutput(fieldsName); fieldsStream.WriteInt(FORMAT_CURRENT); success = true; } finally { if (!success) { try { Dispose(); } catch (System.Exception) { // Suppress so we keep throwing the original exception } try { d.DeleteFile(fieldsName); } catch (System.Exception) { // Suppress so we keep throwing the original exception } } } success = false; String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; try { indexStream = d.CreateOutput(indexName); indexStream.WriteInt(FORMAT_CURRENT); success = true; } finally { if (!success) { try { Dispose(); } catch (System.IO.IOException) { } try { d.DeleteFile(fieldsName); } catch (System.Exception) { // Suppress so we keep throwing the original exception } try { d.DeleteFile(indexName); } catch (System.Exception) { // Suppress so we keep throwing the original exception } } } doClose = true; }
public ChecksumIndexOutput(IndexOutput main) { this.main = main; digest = new CRC32(); }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != - 1) { output.WriteString(docStoreSegment); output.WriteByte((byte) (docStoreIsCompoundFile?1:0)); } output.WriteByte((byte) (hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte) isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte) (hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
/// <summary>Write as a bit set </summary> private void WriteBits(IndexOutput output) { output.WriteInt(Size()); // write size output.WriteInt(Count()); // write count output.WriteBytes(bits, bits.Length); }