public PreFlexRWTermVectorsWriter(Directory directory, string segment, IOContext context) { this.Directory = directory; this.Segment = segment; bool success = false; try { // Open files for TermVector storage Tvx = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), context); Tvx.WriteInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); Tvd = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context); Tvd.WriteInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); Tvf = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION), context); Tvf.WriteInt(Lucene3xTermVectorsReader.FORMAT_CURRENT); success = true; } finally { if (!success) { Abort(); } } }
private void WritePosition(int delta, BytesRef payload) { if (Payloads) { int payloadLength = payload == null ? 0 : payload.Length; if (payloadLength != LastPayloadLength) { LastPayloadLength = payloadLength; Tvf.WriteVInt((delta << 1) | 1); Tvf.WriteVInt(payloadLength); } else { Tvf.WriteVInt(delta << 1); } if (payloadLength > 0) { if (payloadLength + PayloadData.Length < 0) { // we overflowed the payload buffer, just throw UOE // having > Integer.MAX_VALUE bytes of payload for a single term in a single doc is nuts. throw new System.NotSupportedException("A term cannot have more than Integer.MAX_VALUE bytes of payload data in a single document"); } PayloadData.Append(payload); } } else { Tvf.WriteVInt(delta); } }
public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload) { if (Positions && (Offsets || Payloads)) { // write position delta WritePosition(position - LastPosition, payload); LastPosition = position; // buffer offsets if (Offsets) { OffsetStartBuffer[BufferedIndex] = startOffset; OffsetEndBuffer[BufferedIndex] = endOffset; } BufferedIndex++; } else if (Positions) { // write position delta WritePosition(position - LastPosition, payload); LastPosition = position; } else if (Offsets) { // write offset deltas Tvf.WriteVInt(startOffset - LastOffset); Tvf.WriteVInt(endOffset - startOffset); LastOffset = endOffset; } }
public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) { Debug.Assert(LastFieldName == null || info.Name.CompareTo(LastFieldName) > 0, "fieldName=" + info.Name + " lastFieldName=" + LastFieldName); LastFieldName = info.Name; this.Positions = positions; this.Offsets = offsets; this.Payloads = payloads; LastTerm.Length = 0; LastPayloadLength = -1; // force first payload to write its length Fps[FieldCount++] = Tvf.FilePointer; Tvd.WriteVInt(info.Number); Tvf.WriteVInt(numTerms); sbyte bits = 0x0; if (positions) { bits |= Lucene40TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (offsets) { bits |= Lucene40TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } if (payloads) { bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR; } Tvf.WriteByte(bits); }
public override void StartDocument(int numVectorFields) { LastFieldName = null; this.NumVectorFields = numVectorFields; Tvx.WriteInt64(Tvd.GetFilePointer()); Tvx.WriteInt64(Tvf.GetFilePointer()); Tvd.WriteVInt32(numVectorFields); FieldCount = 0; Fps = ArrayUtil.Grow(Fps, numVectorFields); }
/// <summary> /// Do a bulk copy of numDocs documents from reader to our /// streams. this is used to expedite merging, if the /// field numbers are congruent. /// </summary> private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) { long tvdPosition = Tvd.FilePointer; long tvfPosition = Tvf.FilePointer; long tvdStart = tvdPosition; long tvfStart = tvfPosition; for (int i = 0; i < numDocs; i++) { Tvx.WriteLong(tvdPosition); tvdPosition += tvdLengths[i]; Tvx.WriteLong(tvfPosition); tvfPosition += tvfLengths[i]; } Tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart); Tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart); Debug.Assert(Tvd.FilePointer == tvdPosition); Debug.Assert(Tvf.FilePointer == tvfPosition); }
internal BytesRef Scratch = new BytesRef(); // used only by this optimized flush below public override void AddProx(int numProx, DataInput positions, DataInput offsets) { if (Payloads) { // TODO, maybe overkill and just call super.addProx() in this case? // we do avoid buffering the offsets in RAM though. for (int i = 0; i < numProx; i++) { int code = positions.ReadVInt(); if ((code & 1) == 1) { int length = positions.ReadVInt(); Scratch.Grow(length); Scratch.Length = length; positions.ReadBytes(Scratch.Bytes, Scratch.Offset, Scratch.Length); WritePosition((int)((uint)code >> 1), Scratch); } else { WritePosition((int)((uint)code >> 1), null); } } Tvf.WriteBytes(PayloadData.Bytes, PayloadData.Offset, PayloadData.Length); } else if (positions != null) { // pure positions, no payloads for (int i = 0; i < numProx; i++) { Tvf.WriteVInt((int)((uint)positions.ReadVInt() >> 1)); } } if (offsets != null) { for (int i = 0; i < numProx; i++) { Tvf.WriteVInt(offsets.ReadVInt()); Tvf.WriteVInt(offsets.ReadVInt()); } } }
public override void StartTerm(BytesRef term, int freq) { int prefix = StringHelper.BytesDifference(LastTerm, term); int suffix = term.Length - prefix; Tvf.WriteVInt(prefix); Tvf.WriteVInt(suffix); Tvf.WriteBytes(term.Bytes, term.Offset + prefix, suffix); Tvf.WriteVInt(freq); LastTerm.CopyBytes(term); LastPosition = LastOffset = 0; if (Offsets && Positions) { // we might need to buffer if its a non-bulk merge OffsetStartBuffer = ArrayUtil.Grow(OffsetStartBuffer, freq); OffsetEndBuffer = ArrayUtil.Grow(OffsetEndBuffer, freq); OffsetIndex = 0; OffsetFreq = freq; } }
public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload) { Debug.Assert(payload == null); if (Positions && Offsets) { // write position delta Tvf.WriteVInt(position - LastPosition); LastPosition = position; // buffer offsets OffsetStartBuffer[OffsetIndex] = startOffset; OffsetEndBuffer[OffsetIndex] = endOffset; OffsetIndex++; // dump buffer if we are done if (OffsetIndex == OffsetFreq) { for (int i = 0; i < OffsetIndex; i++) { Tvf.WriteVInt(OffsetStartBuffer[i] - LastOffset); Tvf.WriteVInt(OffsetEndBuffer[i] - OffsetStartBuffer[i]); LastOffset = OffsetEndBuffer[i]; } } } else if (Positions) { // write position delta Tvf.WriteVInt(position - LastPosition); LastPosition = position; } else if (Offsets) { // write offset deltas Tvf.WriteVInt(startOffset - LastOffset); Tvf.WriteVInt(endOffset - startOffset); LastOffset = endOffset; } }
public override void FinishTerm() { if (BufferedIndex > 0) { // dump buffer Debug.Assert(Positions && (Offsets || Payloads)); Debug.Assert(BufferedIndex == BufferedFreq); if (Payloads) { Tvf.WriteBytes(PayloadData.Bytes, PayloadData.Offset, PayloadData.Length); } if (Offsets) { for (int i = 0; i < BufferedIndex; i++) { Tvf.WriteVInt(OffsetStartBuffer[i] - LastOffset); Tvf.WriteVInt(OffsetEndBuffer[i] - OffsetStartBuffer[i]); LastOffset = OffsetEndBuffer[i]; } } } }
public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) { Debug.Assert(LastFieldName == null || info.Name.CompareTo(LastFieldName) > 0, "fieldName=" + info.Name + " lastFieldName=" + LastFieldName); LastFieldName = info.Name; if (payloads) { throw new System.NotSupportedException("3.x codec does not support payloads on vectors!"); } this.Positions = positions; this.Offsets = offsets; LastTerm.Length = 0; Fps[FieldCount++] = Tvf.FilePointer; Tvd.WriteVInt(info.Number); Tvf.WriteVInt(numTerms); sbyte bits = 0x0; if (positions) { bits |= Lucene3xTermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (offsets) { bits |= Lucene3xTermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } Tvf.WriteByte(bits); Debug.Assert(FieldCount <= NumVectorFields); if (FieldCount == NumVectorFields) { // last field of the document // this is crazy because the file format is crazy! for (int i = 1; i < FieldCount; i++) { Tvd.WriteVLong(Fps[i] - Fps[i - 1]); } } }