/// <summary> /// Loads the segment information at segment load time. /// </summary> /// <param name="indexEnum"> /// The term enum. </param> /// <param name="indexDivisor"> /// The index divisor. </param> /// <param name="tiiFileLength"> /// The size of the tii file, used to approximate the size of the /// buffer. </param> /// <param name="totalIndexInterval"> /// The total index interval. </param> public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) { this.totalIndexInterval = totalIndexInterval; indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor; skipInterval = indexEnum.skipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.GetDataOutput(); int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInt32s.DEFAULT); string currentField = null; IList <string> fieldStrs = new List <string>(); int fieldCounter = -1; for (int i = 0; indexEnum.Next(); i++) { Term term = indexEnum.Term(); if (currentField == null || !currentField.Equals(term.Field, StringComparison.Ordinal)) { currentField = term.Field; fieldStrs.Add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.TermInfo(); indexToTerms.Set(i, dataOutput.GetPosition()); dataOutput.WriteVInt32(fieldCounter); dataOutput.WriteString(term.Text()); dataOutput.WriteVInt32(termInfo.DocFreq); if (termInfo.DocFreq >= skipInterval) { dataOutput.WriteVInt32(termInfo.SkipOffset); } dataOutput.WriteVInt64(termInfo.FreqPointer); dataOutput.WriteVInt64(termInfo.ProxPointer); dataOutput.WriteVInt64(indexEnum.indexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } fields = new Term[fieldStrs.Count]; for (int i = 0; i < fields.Length; i++) { fields[i] = new Term(fieldStrs[i]); } dataPagedBytes.Freeze(true); dataInput = dataPagedBytes.GetDataInput(); indexToDataOffset = indexToTerms.Mutable; ramBytesUsed = fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + indexToDataOffset.RamBytesUsed(); }
public virtual void Seek(SegmentTermEnum segmentTermEnum) { TermInfo ti; Term term; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (segmentTermEnum.FieldInfos == FieldInfos) // optimized case { term = segmentTermEnum.Term(); ti = segmentTermEnum.TermInfo(); } // punt case else { term = segmentTermEnum.Term(); ti = Tis.Get(term); } Seek(ti, term); }
/// <summary> /// Loads the segment information at segment load time. /// </summary> /// <param name="indexEnum"> /// the term enum. </param> /// <param name="indexDivisor"> /// the index divisor. </param> /// <param name="tiiFileLength"> /// the size of the tii file, used to approximate the size of the /// buffer. </param> /// <param name="totalIndexInterval"> /// the total index interval. </param> public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) { this.TotalIndexInterval = totalIndexInterval; IndexSize = 1 + ((int)indexEnum.Size - 1) / indexDivisor; SkipInterval = indexEnum.SkipInterval; // this is only an inital size, it will be GCed once the build is complete long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor; PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize)); PagedBytesDataOutput dataOutput = dataPagedBytes.DataOutput; int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2); GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, IndexSize, PackedInts.DEFAULT); string currentField = null; IList<string> fieldStrs = new List<string>(); int fieldCounter = -1; for (int i = 0; indexEnum.Next(); i++) { Term term = indexEnum.Term(); if (currentField == null || !currentField.Equals(term.Field())) { currentField = term.Field(); fieldStrs.Add(currentField); fieldCounter++; } TermInfo termInfo = indexEnum.TermInfo(); indexToTerms.Set(i, dataOutput.Position); dataOutput.WriteVInt(fieldCounter); dataOutput.WriteString(term.Text()); dataOutput.WriteVInt(termInfo.DocFreq); if (termInfo.DocFreq >= SkipInterval) { dataOutput.WriteVInt(termInfo.SkipOffset); } dataOutput.WriteVLong(termInfo.FreqPointer); dataOutput.WriteVLong(termInfo.ProxPointer); dataOutput.WriteVLong(indexEnum.IndexPointer); for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } Fields = new Term[fieldStrs.Count]; for (int i = 0; i < Fields.Length; i++) { Fields[i] = new Term(fieldStrs[i]); } dataPagedBytes.Freeze(true); DataInput = dataPagedBytes.DataInput; IndexToDataOffset = indexToTerms.Mutable; RamBytesUsed_Renamed = Fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + IndexToDataOffset.RamBytesUsed(); }