/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != -1) { output.WriteString(docStoreSegment); output.WriteByte((byte)(docStoreIsCompoundFile?1:0)); } output.WriteByte((byte)(hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte)isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte)(hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
/// <summary>Fills in no-term-vectors for all docs we haven't seen /// since the last doc that had term vectors. /// </summary> internal void Fill(int docID) { int docStoreOffset = docWriter.GetDocStoreOffset(); int end = docID + docStoreOffset; if (lastDocID < end) { long tvfPosition = tvf.GetFilePointer(); while (lastDocID < end) { tvx.WriteLong(tvd.GetFilePointer()); tvd.WriteVInt(0); tvx.WriteLong(tvfPosition); lastDocID++; } } }
/// <summary>Called to complete TermInfos creation. </summary> internal void Close() { output.Seek(4); // write size after format output.WriteLong(size); output.Close(); if (!isIndex) { other.Close(); } }
private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.CreateOutput(segment + (isIndex?".tii":".tis")); output.WriteInt(FORMAT_CURRENT); // write format output.WriteLong(0); // leave space for size output.WriteInt(indexInterval); // write indexInterval output.WriteInt(skipInterval); // write skipInterval output.WriteInt(maxSkipLevels); // write maxSkipLevels System.Diagnostics.Debug.Assert(InitUTF16Results()); }
// Writes the contents of buffer into the fields stream // and adds a new entry for this document into the index // stream. This assumes the buffer was already written // in the correct fields format. internal void FlushDocument(int numStoredFields, RAMOutputStream buffer) { indexStream.WriteLong(fieldsStream.GetFilePointer()); fieldsStream.WriteVInt(numStoredFields); buffer.WriteTo(fieldsStream); }
/// <summary> Save this segment's info.</summary> internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); output.WriteInt(docStoreOffset); if (docStoreOffset != - 1) { output.WriteString(docStoreSegment); output.WriteByte((byte) (docStoreIsCompoundFile?1:0)); } output.WriteByte((byte) (hasSingleNormFile?1:0)); if (normGen == null) { output.WriteInt(NO); } else { output.WriteInt(normGen.Length); for (int j = 0; j < normGen.Length; j++) { output.WriteLong(normGen[j]); } } output.WriteByte((byte) isCompoundFile); output.WriteInt(delCount); output.WriteByte((byte) (hasProx?1:0)); output.WriteStringStringMap(diagnostics); }
/// <summary> Add a complete document specified by all its term vectors. If document has no /// term vectors, add value for tvx. /// /// </summary> /// <param name="vectors"> /// </param> /// <throws> IOException </throws> public void AddAllDocVectors(TermFreqVector[] vectors) { tvx.WriteLong(tvd.GetFilePointer()); tvx.WriteLong(tvf.GetFilePointer()); if (vectors != null) { int numFields = vectors.Length; tvd.WriteVInt(numFields); long[] fieldPointers = new long[numFields]; for (int i = 0; i < numFields; i++) { fieldPointers[i] = tvf.GetFilePointer(); int fieldNumber = fieldInfos.FieldNumber(vectors[i].GetField()); // 1st pass: write field numbers to tvd tvd.WriteVInt(fieldNumber); int numTerms = vectors[i].Size(); tvf.WriteVInt(numTerms); TermPositionVector tpVector; byte bits; bool storePositions; bool storeOffsets; if (vectors[i] is TermPositionVector) { // May have positions & offsets tpVector = (TermPositionVector)vectors[i]; storePositions = tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null; storeOffsets = tpVector.Size() > 0 && tpVector.GetOffsets(0) != null; bits = (byte)((storePositions?TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR: (byte)0) + (storeOffsets?TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR: (byte)0)); } else { tpVector = null; bits = 0; storePositions = false; storeOffsets = false; } tvf.WriteVInt(bits); System.String[] terms = vectors[i].GetTerms(); int[] freqs = vectors[i].GetTermFrequencies(); int utf8Upto = 0; utf8Results[1].length = 0; for (int j = 0; j < numTerms; j++) { UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]); int start = StringHelper.BytesDifference(utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length); int length = utf8Results[utf8Upto].length - start; tvf.WriteVInt(start); // write shared prefix length tvf.WriteVInt(length); // write delta length tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes utf8Upto = 1 - utf8Upto; int termFreq = freqs[j]; tvf.WriteVInt(termFreq); if (storePositions) { int[] positions = tpVector.GetTermPositions(j); if (positions == null) { throw new System.SystemException("Trying to write positions that are null!"); } System.Diagnostics.Debug.Assert(positions.Length == termFreq); // use delta encoding for positions int lastPosition = 0; for (int k = 0; k < positions.Length; k++) { int position = positions[k]; tvf.WriteVInt(position - lastPosition); lastPosition = position; } } if (storeOffsets) { TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j); if (offsets == null) { throw new System.SystemException("Trying to write offsets that are null!"); } System.Diagnostics.Debug.Assert(offsets.Length == termFreq); // use delta encoding for offsets int lastEndOffset = 0; for (int k = 0; k < offsets.Length; k++) { int startOffset = offsets[k].GetStartOffset(); int endOffset = offsets[k].GetEndOffset(); tvf.WriteVInt(startOffset - lastEndOffset); tvf.WriteVInt(endOffset - startOffset); lastEndOffset = endOffset; } } } } // 2nd pass: write field pointers to tvd if (numFields > 1) { long lastFieldPointer = fieldPointers[0]; for (int i = 1; i < numFields; i++) { long fieldPointer = fieldPointers[i]; tvd.WriteVLong(fieldPointer - lastFieldPointer); lastFieldPointer = fieldPointer; } } } else { tvd.WriteVInt(0); } }
/// <summary>Merge files with the extensions added up to now. /// All files with these extensions are combined sequentially into the /// compound stream. After successful merge, the source files /// are deleted. /// </summary> /// <throws> IllegalStateException if close() had been called before or </throws> /// <summary> if no file has been added to this object /// </summary> public void Close() { if (merged) { throw new System.SystemException("Merge already performed"); } if ((entries.Count == 0)) { throw new System.SystemException("No entries to merge have been defined"); } merged = true; // open the compound stream IndexOutput os = null; try { os = directory.CreateOutput(fileName); // Write the number of entries os.WriteVInt(entries.Count); // Write the directory with all offsets at 0. // Remember the positions of directory entries so that we can // adjust the offsets later System.Collections.IEnumerator it = entries.GetEnumerator(); long totalSize = 0; while (it.MoveNext()) { FileEntry fe = (FileEntry)it.Current; fe.directoryOffset = os.GetFilePointer(); os.WriteLong(0); // for now os.WriteString(fe.file); totalSize += directory.FileLength(fe.file); } // Pre-allocate size of file as optimization -- // this can potentially help IO performance as // we write the file and also later during // searching. It also uncovers a disk-full // situation earlier and hopefully without // actually filling disk to 100%: long finalLength = totalSize + os.GetFilePointer(); os.SetLength(finalLength); // Open the files and copy their data into the stream. // Remember the locations of each file's data section. byte[] buffer = new byte[16384]; it = entries.GetEnumerator(); while (it.MoveNext()) { FileEntry fe = (FileEntry)it.Current; fe.dataOffset = os.GetFilePointer(); CopyFile(fe, os, buffer); } // Write the data offsets into the directory of the compound stream it = entries.GetEnumerator(); while (it.MoveNext()) { FileEntry fe = (FileEntry)it.Current; os.Seek(fe.directoryOffset); os.WriteLong(fe.dataOffset); } System.Diagnostics.Debug.Assert(finalLength == os.Length()); // Close the output stream. Set the os to null before trying to // close so that if an exception occurs during the close, the // finally clause below will not attempt to close the stream // the second time. IndexOutput tmp = os; os = null; tmp.Close(); } finally { if (os != null) { try { os.Close(); } catch (System.IO.IOException e) { } } } }
internal void FinishCommit(Directory dir) { if (pendingSegnOutput == null) { throw new System.SystemException("prepareCommit was not called"); } bool success = false; try { pendingSegnOutput.FinishCommit(); pendingSegnOutput.Close(); pendingSegnOutput = null; success = true; } finally { if (!success) { RollbackCommit(dir); } } // NOTE: if we crash here, we have left a segments_N // file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others // didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a // reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); success = false; try { dir.Sync(fileName); success = true; } finally { if (!success) { try { dir.DeleteFile(fileName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } lastGeneration = generation; try { IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN); try { genOutput.WriteInt(FORMAT_LOCKLESS); genOutput.WriteLong(generation); genOutput.WriteLong(generation); } finally { genOutput.Close(); } } catch (System.Exception t) { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } }