/// <summary> /// Compare the fields of the terms first, and if not equals return from /// compare. If equal compare terms. /// </summary> /// <param name="term"> /// the term to compare. </param> /// <param name="termIndex"> /// the position of the term in the input to compare </param> /// <param name="input"> /// the input buffer. </param> /// <returns> int. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> private int CompareTo(Term term, int termIndex, PagedBytesDataInput input, BytesRef reuse) { // if term field does not equal mid's field index, then compare fields // else if they are equal, compare term's string values... int c = CompareField(term, termIndex, input); if (c == 0) { reuse.Length = input.ReadVInt(); reuse.Grow(reuse.Length); input.ReadBytes(reuse.Bytes, 0, reuse.Length); return Comparator.Compare(term.Bytes, reuse); } return c; }
/// <summary> /// Loads the segment information at segment load time. /// </summary> /// <param name="indexEnum"> /// the term enum. </param> /// <param name="indexDivisor"> /// the index divisor. </param> /// <param name="tiiFileLength"> /// the size of the tii file, used to approximate the size of the /// buffer. </param> /// <param name="totalIndexInterval"> /// the total index interval. </param> public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) { this.TotalIndexInterval = totalIndexInterval; IndexSize = 1 + ((int)indexEnum.Size - 1) / indexDivisor; SkipInterval = indexEnum.SkipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.DataOutput; int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, IndexSize, PackedInts.DEFAULT); string currentField = null; IList<string> fieldStrs = new List<string>(); int fieldCounter = -1; for (int i = 0; indexEnum.Next(); i++) { Term term = indexEnum.Term(); if (currentField == null || !currentField.Equals(term.Field)) { currentField = term.Field; fieldStrs.Add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.TermInfo(); indexToTerms.Set(i, dataOutput.Position); dataOutput.WriteVInt(fieldCounter); dataOutput.WriteString(term.Text()); dataOutput.WriteVInt(termInfo.DocFreq); if (termInfo.DocFreq >= SkipInterval) { dataOutput.WriteVInt(termInfo.SkipOffset); } dataOutput.WriteVLong(termInfo.FreqPointer); dataOutput.WriteVLong(termInfo.ProxPointer); dataOutput.WriteVLong(indexEnum.IndexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } Fields = new Term[fieldStrs.Count]; for (int i = 0; i < Fields.Length; i++) { Fields[i] = new Term(fieldStrs[i]); } dataPagedBytes.Freeze(true); DataInput = dataPagedBytes.DataInput; IndexToDataOffset = indexToTerms.Mutable; RamBytesUsed_Renamed = Fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + IndexToDataOffset.RamBytesUsed(); }
/// <summary> /// Compares the fields before checking the text of the terms. /// </summary> /// <param name="term"> /// the given term. </param> /// <param name="termIndex"> /// the term that exists in the data block. </param> /// <param name="input"> /// the data block. </param> /// <returns> int. </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> private int CompareField(Term term, int termIndex, PagedBytesDataInput input) { input.Position = IndexToDataOffset.Get(termIndex); return System.String.Compare(term.Field, Fields[input.ReadVInt()].Field, System.StringComparison.Ordinal); }