internal virtual void SeekEnum(SegmentTermEnum enumerator, int indexOffset) { PagedBytesDataInput input = (PagedBytesDataInput)dataInput.Clone(); input.SetPosition(indexToDataOffset.Get(indexOffset)); // read the term int fieldId = input.ReadVInt32(); Term field = fields[fieldId]; Term term = new Term(field.Field, input.ReadString()); // read the terminfo var termInfo = new TermInfo(); termInfo.DocFreq = input.ReadVInt32(); if (termInfo.DocFreq >= skipInterval) { termInfo.SkipOffset = input.ReadVInt32(); } else { termInfo.SkipOffset = 0; } termInfo.FreqPointer = input.ReadVInt64(); termInfo.ProxPointer = input.ReadVInt64(); long pointer = input.ReadVInt64(); // perform the seek enumerator.Seek(pointer, ((long)indexOffset * totalIndexInterval) - 1, term, termInfo); }
/// <summary> /// Binary search for the given term. /// </summary> /// <param name="term"> /// The term to locate. </param> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> internal virtual int GetIndexOffset(Term term) { int lo = 0; int hi = indexSize - 1; PagedBytesDataInput input = (PagedBytesDataInput)dataInput.Clone(); BytesRef scratch = new BytesRef(); while (hi >= lo) { int mid = (int)((uint)(lo + hi) >> 1); int delta = CompareTo(term, mid, input, scratch); if (delta < 0) { hi = mid - 1; } else if (delta > 0) { lo = mid + 1; } else { return(mid); } } return(hi); }
public override object Clone() { PagedBytesDataInput clone = outerInstance.GetDataInput(); clone.SetPosition(GetPosition()); return(clone); }
public override object Clone() { PagedBytesDataInput clone = OuterInstance.DataInput; clone.Position = Position; return(clone); }
/// <summary> /// Loads the segment information at segment load time. /// </summary> /// <param name="indexEnum"> /// The term enum. </param> /// <param name="indexDivisor"> /// The index divisor. </param> /// <param name="tiiFileLength"> /// The size of the tii file, used to approximate the size of the /// buffer. </param> /// <param name="totalIndexInterval"> /// The total index interval. </param> public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.GetDataOutput(); int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInt32s.DEFAULT); string currentField = null; IList <string> fieldStrs = new List <string>(); int fieldCounter = -1; for (int i = 0; indexEnum.Next(); i++) { Term term = indexEnum.Term(); if (currentField == null || !currentField.Equals(term.Field, StringComparison.Ordinal)) { currentField = term.Field; fieldStrs.Add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.TermInfo(); indexToTerms.Set(i, dataOutput.GetPosition()); dataOutput.WriteVInt32(fieldCounter); dataOutput.WriteString(term.Text()); dataOutput.WriteVInt32(termInfo.DocFreq); if (termInfo.DocFreq >= skipInterval) { dataOutput.WriteVInt32(termInfo.SkipOffset); } dataOutput.WriteVInt64(termInfo.FreqPointer); dataOutput.WriteVInt64(termInfo.ProxPointer); dataOutput.WriteVInt64(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } fields = new Term[fieldStrs.Count]; for (int i = 0; i < fields.Length; i++) { fields[i] = new Term(fieldStrs[i]); } dataPagedBytes.Freeze(true); dataInput = dataPagedBytes.GetDataInput(); indexToDataOffset = indexToTerms.Mutable; ramBytesUsed = fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + indexToDataOffset.RamBytesUsed(); }
/// <summary> /// Gets the term at the given position. For testing. /// </summary> /// <param name="termIndex"> /// The position to read the term from the index. </param> /// <returns> The term. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> internal virtual Term GetTerm(int termIndex) { PagedBytesDataInput input = (PagedBytesDataInput)dataInput.Clone(); input.SetPosition(indexToDataOffset.Get(termIndex)); // read the term int fieldId = input.ReadVInt32(); Term field = fields[fieldId]; return(new Term(field.Field, input.ReadString())); }
/// <summary> /// Gets the term at the given position. For testing. /// </summary> /// <param name="termIndex"> /// the position to read the term from the index. </param> /// <returns> the term. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public virtual Term GetTerm(int termIndex) { PagedBytesDataInput input = (PagedBytesDataInput)DataInput.Clone(); input.Position = IndexToDataOffset.Get(termIndex); // read the term int fieldId = input.ReadVInt(); Term field = Fields[fieldId]; return(new Term(field.Field, input.ReadString())); }
/// <summary> /// Compare the fields of the terms first, and if not equals return from /// compare. If equal compare terms. /// </summary> /// <param name="term"> /// The term to compare. </param> /// <param name="termIndex"> /// The position of the term in the input to compare </param> /// <param name="input"> /// The input buffer. </param> /// <returns> int. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> private int CompareTo(Term term, int termIndex, PagedBytesDataInput input, BytesRef reuse) { // if term field does not equal mid's field index, then compare fields // else if they are equal, compare term's string values... int c = CompareField(term, termIndex, input); if (c == 0) { reuse.Length = input.ReadVInt32(); reuse.Grow(reuse.Length); input.ReadBytes(reuse.Bytes, 0, reuse.Length); return(comparer.Compare(term.Bytes, reuse)); } return(c); }
/// <summary> /// Compares the fields before checking the text of the terms. /// </summary> /// <param name="term"> /// The given term. </param> /// <param name="termIndex"> /// The term that exists in the data block. </param> /// <param name="input"> /// The data block. </param> /// <returns> int. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> private int CompareField(Term term, int termIndex, PagedBytesDataInput input) { input.SetPosition(indexToDataOffset.Get(termIndex)); return(term.Field.CompareToOrdinal(fields[input.ReadVInt32()].Field)); }
/// <summary> /// Compares the fields before checking the text of the terms. /// </summary> /// <param name="term"> /// the given term. </param> /// <param name="termIndex"> /// the term that exists in the data block. </param> /// <param name="input"> /// the data block. </param> /// <returns> int. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> private int CompareField(Term term, int termIndex, PagedBytesDataInput input) { input.Position = IndexToDataOffset.Get(termIndex); return(System.String.Compare(term.Field, Fields[input.ReadVInt()].Field, System.StringComparison.Ordinal)); }
/// <summary> /// Compares the fields before checking the text of the terms. /// </summary> /// <param name="term"> /// the given term. </param> /// <param name="termIndex"> /// the term that exists in the data block. </param> /// <param name="input"> /// the data block. </param> /// <returns> int. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> private int CompareField(Term term, int termIndex, PagedBytesDataInput input) { input.Position = IndexToDataOffset.Get(termIndex); return(term.Field().CompareTo(Fields[input.ReadVInt()].Field())); }