private void WriteTerm(Term term) { int start = StringHelper.StringDifference(lastTerm.text, term.text); int length = term.text.Length - start; output.WriteVInt(start); // write shared prefix length output.WriteVInt(length); // write delta length output.WriteChars(term.text, start, length); // write delta chars output.WriteVInt(fieldInfos.FieldNumber(term.field)); // write field num lastTerm = term; }
private void WriteField() { // remember where this field is written currentField.tvfPointer = tvf.GetFilePointer(); //System.out.println("Field Pointer: " + currentField.tvfPointer); int size = terms.Count; tvf.WriteVInt(size); bool storePositions = currentField.storePositions; bool storeOffsets = currentField.storeOffsets; byte bits = (byte)(0x0); if (storePositions) { bits |= STORE_POSITIONS_WITH_TERMVECTOR; } if (storeOffsets) { bits |= STORE_OFFSET_WITH_TERMVECTOR; } tvf.WriteByte(bits); System.String lastTermText = ""; for (int i = 0; i < size; i++) { TVTerm term = (TVTerm)terms[i]; int start = StringHelper.StringDifference(lastTermText, term.termText); int length = term.termText.Length - start; tvf.WriteVInt(start); // write shared prefix length tvf.WriteVInt(length); // write delta length tvf.WriteChars(term.termText, start, length); // write delta chars tvf.WriteVInt(term.freq); lastTermText = term.termText; if (storePositions) { if (term.positions == null) { throw new System.SystemException("Trying to write positions that are null!"); } // use delta encoding for positions int position = 0; for (int j = 0; j < term.freq; j++) { tvf.WriteVInt(term.positions[j] - position); position = term.positions[j]; } } if (storeOffsets) { if (term.offsets == null) { throw new System.SystemException("Trying to write offsets that are null!"); } // use delta encoding for offsets int position = 0; for (int j = 0; j < term.freq; j++) { tvf.WriteVInt(term.offsets[j].GetStartOffset() - position); tvf.WriteVInt(term.offsets[j].GetEndOffset() - term.offsets[j].GetStartOffset()); //Save the diff between the two. position = term.offsets[j].GetEndOffset(); } } } }
/// <summary> Add a complete document specified by all its term vectors. If document has no /// term vectors, add value for tvx. /// /// </summary> /// <param name="vectors"> /// </param> /// <throws> IOException </throws> public void AddAllDocVectors(TermFreqVector[] vectors) { tvx.WriteLong(tvd.GetFilePointer()); if (vectors != null) { int numFields = vectors.Length; tvd.WriteVInt(numFields); long[] fieldPointers = new long[numFields]; for (int i = 0; i < numFields; i++) { fieldPointers[i] = tvf.GetFilePointer(); int fieldNumber = fieldInfos.FieldNumber(vectors[i].GetField()); // 1st pass: write field numbers to tvd tvd.WriteVInt(fieldNumber); int numTerms = vectors[i].Size(); tvf.WriteVInt(numTerms); TermPositionVector tpVector; byte bits; bool storePositions; bool storeOffsets; if (vectors[i] is TermPositionVector) { // May have positions & offsets tpVector = (TermPositionVector)vectors[i]; storePositions = tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null; storeOffsets = tpVector.Size() > 0 && tpVector.GetOffsets(0) != null; bits = (byte)((storePositions ? TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR : (byte)0) + (storeOffsets ? TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR : (byte)0)); } else { tpVector = null; bits = 0; storePositions = false; storeOffsets = false; } tvf.WriteVInt(bits); System.String[] terms = vectors[i].GetTerms(); int[] freqs = vectors[i].GetTermFrequencies(); System.String lastTermText = ""; for (int j = 0; j < numTerms; j++) { System.String termText = terms[j]; int start = StringHelper.StringDifference(lastTermText, termText); int length = termText.Length - start; tvf.WriteVInt(start); // write shared prefix length tvf.WriteVInt(length); // write delta length tvf.WriteChars(termText, start, length); // write delta chars lastTermText = termText; int termFreq = freqs[j]; tvf.WriteVInt(termFreq); if (storePositions) { int[] positions = tpVector.GetTermPositions(j); if (positions == null) { throw new System.SystemException("Trying to write positions that are null!"); } System.Diagnostics.Debug.Assert(positions.Length == termFreq); // use delta encoding for positions int lastPosition = 0; for (int k = 0; k < positions.Length; k++) { int position = positions[k]; tvf.WriteVInt(position - lastPosition); lastPosition = position; } } if (storeOffsets) { TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j); if (offsets == null) { throw new System.SystemException("Trying to write offsets that are null!"); } System.Diagnostics.Debug.Assert(offsets.Length == termFreq); // use delta encoding for offsets int lastEndOffset = 0; for (int k = 0; k < offsets.Length; k++) { int startOffset = offsets[k].GetStartOffset(); int endOffset = offsets[k].GetEndOffset(); tvf.WriteVInt(startOffset - lastEndOffset); tvf.WriteVInt(endOffset - startOffset); lastEndOffset = endOffset; } } } } // 2nd pass: write field pointers to tvd long lastFieldPointer = 0; for (int i = 0; i < numFields; i++) { long fieldPointer = fieldPointers[i]; tvd.WriteVLong(fieldPointer - lastFieldPointer); lastFieldPointer = fieldPointer; } } else { tvd.WriteVInt(0); } }