/// <summary>Adds a new <<fieldNumber, termText>, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> internal void Add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti) { System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 || (isIndex && termTextLength == 0 && lastTermTextLength == 0), "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + "(number " + fieldNumber + ")" + " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + " text=" + new String(termText, termTextStart, termTextLength) + " lastText=" + new String(lastTermText, 0, lastTermTextLength)); System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"); System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"); if (!isIndex && size % indexInterval == 0) { other.Add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term } WriteTerm(fieldNumber, termText, termTextStart, termTextLength); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } if (lastTermText.Length < termTextLength) { lastTermText = new char[(int)(termTextLength * 1.25)]; } Array.Copy(termText, termTextStart, lastTermText, 0, termTextLength); lastTermTextLength = termTextLength; lastFieldNumber = fieldNumber; lastTi.Set(ti); size++; }
/// <summary>Adds a new <Term, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> public /*internal*/ void Add(Term term, TermInfo ti) { if (!isIndex && term.CompareTo(lastTerm) <= 0) { throw new System.IO.IOException("term out of order"); } if (ti.freqPointer < lastTi.freqPointer) { throw new System.IO.IOException("freqPointer out of order"); } if (ti.proxPointer < lastTi.proxPointer) { throw new System.IO.IOException("proxPointer out of order"); } if (!isIndex && size % indexInterval == 0) { other.Add(lastTerm, lastTi); // add an index term } WriteTerm(term); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastTi.Set(ti); size++; }
/// <summary>Adds a new <fieldNumber, termBytes>, TermInfo> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// TermInfo pointers must be positive and greater than all previous. /// </summary> internal void Add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti) { System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || (isIndex && termBytesLength == 0 && lastTermBytesLength == 0), "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + " (number " + fieldNumber + ")" + " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + " text=" + System.Text.Encoding.UTF8.GetString(termBytes, 0, termBytesLength) + " lastText=" + System.Text.Encoding.UTF8.GetString(lastTermBytes, 0, lastTermBytesLength)); System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"); System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"); if (!isIndex && size % indexInterval == 0) { other.Add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term } WriteTerm(fieldNumber, termBytes, termBytesLength); // write term output.WriteVInt(ti.docFreq); // write doc freq output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.WriteVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.WriteVInt(ti.skipOffset); } if (isIndex) { output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastFieldNumber = fieldNumber; lastTi.Set(ti); size++; }
internal void FinishDocument(PerDoc perDoc) { lock (this) { System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start")); InitTermVectorsWriter(); Fill(perDoc.docID); // Append term vectors to the real outputs: tvx.WriteLong(tvd.FilePointer); tvx.WriteLong(tvf.FilePointer); tvd.WriteVInt(perDoc.numVectorFields); if (perDoc.numVectorFields > 0) { for (int i = 0; i < perDoc.numVectorFields; i++) { tvd.WriteVInt(perDoc.fieldNumbers[i]); } System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]); long lastPos = perDoc.fieldPointers[0]; for (int i = 1; i < perDoc.numVectorFields; i++) { long pos = perDoc.fieldPointers[i]; tvd.WriteVLong(pos - lastPos); lastPos = pos; } perDoc.perDocTvf.WriteTo(tvf); perDoc.numVectorFields = 0; } System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset); lastDocID++; perDoc.Reset(); Free(perDoc); System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end")); } }
/// <summary> Writes the buffered skip lists to the given output. /// /// </summary> /// <param name="output">the IndexOutput the skip lists shall be written to /// </param> /// <returns> the pointer the skip list starts /// </returns> internal virtual long WriteSkip(IndexOutput output) { long skipPointer = output.FilePointer; if (skipBuffer == null || skipBuffer.Length == 0) { return(skipPointer); } for (int level = numberOfSkipLevels - 1; level > 0; level--) { long length = skipBuffer[level].FilePointer; if (length > 0) { output.WriteVLong(length); skipBuffer[level].WriteTo(output); } } skipBuffer[0].WriteTo(output); return(skipPointer); }
/// <summary> Writes the buffered skip lists to the given output. /// /// </summary> /// <param name="output">the IndexOutput the skip lists shall be written to /// </param> /// <returns> the pointer the skip list starts /// </returns> internal virtual long WriteSkip(IndexOutput output) { long skipPointer = output.FilePointer; if (skipBuffer == null || skipBuffer.Length == 0) return skipPointer; for (int level = numberOfSkipLevels - 1; level > 0; level--) { long length = skipBuffer[level].FilePointer; if (length > 0) { output.WriteVLong(length); skipBuffer[level].WriteTo(output); } } skipBuffer[0].WriteTo(output); return skipPointer; }
/// <summary> /// Writes the buffered skip lists to the given output. /// </summary> /// <param name="output"> the IndexOutput the skip lists shall be written to </param> /// <returns> the pointer the skip list starts </returns> public virtual long WriteSkip(IndexOutput output) { long skipPointer = output.FilePointer; //System.out.println("skipper.writeSkip fp=" + skipPointer); if (SkipBuffer == null || SkipBuffer.Length == 0) { return skipPointer; } for (int level = NumberOfSkipLevels - 1; level > 0; level--) { long length = SkipBuffer[level].FilePointer; if (length > 0) { output.WriteVLong(length); SkipBuffer[level].WriteTo(output); } } SkipBuffer[0].WriteTo(output); return skipPointer; }
// NOTE: 4.0 file format docs are crazy/wrong here... private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable<BytesRef> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); /* values */ long startPos = data.FilePointer; foreach (BytesRef v in values) { if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } } /* addresses */ long maxAddress = data.FilePointer - startPos; index.WriteVLong(maxAddress); int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); if (v != null) { currentPosition += v.Length; } } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); }
/// <summary> Add a complete document specified by all its term vectors. If document has no /// term vectors, add value for tvx. /// /// </summary> /// <param name="vectors"> /// </param> /// <throws> IOException </throws> public void AddAllDocVectors(ITermFreqVector[] vectors) { tvx.WriteLong(tvd.FilePointer); tvx.WriteLong(tvf.FilePointer); if (vectors != null) { int numFields = vectors.Length; tvd.WriteVInt(numFields); var fieldPointers = new long[numFields]; for (int i = 0; i < numFields; i++) { fieldPointers[i] = tvf.FilePointer; int fieldNumber = fieldInfos.FieldNumber(vectors[i].Field); // 1st pass: write field numbers to tvd tvd.WriteVInt(fieldNumber); int numTerms = vectors[i].Size; tvf.WriteVInt(numTerms); TermPositionVector tpVector; byte bits; bool storePositions; bool storeOffsets; if (vectors[i] is TermPositionVector) { // May have positions & offsets tpVector = (TermPositionVector)vectors[i]; storePositions = tpVector.Size > 0 && tpVector.GetTermPositions(0) != null; storeOffsets = tpVector.Size > 0 && tpVector.GetOffsets(0) != null; bits = (byte)((storePositions?TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR: (byte)0) + (storeOffsets?TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR: (byte)0)); } else { tpVector = null; bits = 0; storePositions = false; storeOffsets = false; } tvf.WriteVInt(bits); System.String[] terms = vectors[i].GetTerms(); int[] freqs = vectors[i].GetTermFrequencies(); int utf8Upto = 0; utf8Results[1].length = 0; for (int j = 0; j < numTerms; j++) { UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]); int start = StringHelper.BytesDifference(utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length); int length = utf8Results[utf8Upto].length - start; tvf.WriteVInt(start); // write shared prefix length tvf.WriteVInt(length); // write delta length tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes utf8Upto = 1 - utf8Upto; int termFreq = freqs[j]; tvf.WriteVInt(termFreq); if (storePositions) { int[] positions = tpVector.GetTermPositions(j); if (positions == null) { throw new System.SystemException("Trying to write positions that are null!"); } System.Diagnostics.Debug.Assert(positions.Length == termFreq); // use delta encoding for positions int lastPosition = 0; foreach (int position in positions) { tvf.WriteVInt(position - lastPosition); lastPosition = position; } } if (storeOffsets) { TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j); if (offsets == null) { throw new System.SystemException("Trying to write offsets that are null!"); } System.Diagnostics.Debug.Assert(offsets.Length == termFreq); // use delta encoding for offsets int lastEndOffset = 0; foreach (TermVectorOffsetInfo t in offsets) { int startOffset = t.StartOffset; int endOffset = t.EndOffset; tvf.WriteVInt(startOffset - lastEndOffset); tvf.WriteVInt(endOffset - startOffset); lastEndOffset = endOffset; } } } } // 2nd pass: write field pointers to tvd if (numFields > 1) { long lastFieldPointer = fieldPointers[0]; for (int i = 1; i < numFields; i++) { long fieldPointer = fieldPointers[i]; tvd.WriteVLong(fieldPointer - lastFieldPointer); lastFieldPointer = fieldPointer; } } } else { tvd.WriteVInt(0); } }